User Inputs

output.var = params$output.var 

transform.abs = FALSE
log.pred = params$log.pred
norm.pred = FALSE
algo.forward.caret = params$algo.forward.caret
algo.backward.caret = params$algo.backward.caret
algo.stepwise.caret = params$algo.stepwise.caret
algo.LASSO.caret = params$algo.LASSO.caret
algo.LARS.caret = params$algo.LARS.caret
message("Parameters used for training/prediction: ")
## Parameters used for training/prediction:
str(params)
## List of 7
##  $ output.var         : chr "y3"
##  $ log.pred           : logi TRUE
##  $ algo.forward.caret : logi TRUE
##  $ algo.backward.caret: logi TRUE
##  $ algo.stepwise.caret: logi TRUE
##  $ algo.LASSO.caret   : logi TRUE
##  $ algo.LARS.caret    : logi TRUE
# Setup Labels
output.var.tr = if (log.pred == TRUE)  paste0(output.var,'.log') else  output.var.tr = output.var

Loading Data

feat  = read.csv('../../Data/features_highprec.csv')
labels = read.csv('../../Data/labels.csv')
predictors = names(dplyr::select(feat,-JobName))
data.ori = inner_join(feat,labels,by='JobName')
#data.ori = inner_join(feat,select_at(labels,c('JobName',output.var)),by='JobName')

Data validation

cc  = complete.cases(data.ori)
data.notComplete = data.ori[! cc,]
data = data.ori[cc,] %>% select_at(c(predictors,output.var,'JobName'))
message('Original cases: ',nrow(data.ori))
## Original cases: 10000
message('Non-Complete cases: ',nrow(data.notComplete))
## Non-Complete cases: 3020
message('Complete cases: ',nrow(data))
## Complete cases: 6980
summary(dplyr::select_at(data,c('JobName',output.var)))
##       JobName           y3        
##  Job_00001:   1   Min.   : 95.91  
##  Job_00002:   1   1st Qu.:118.29  
##  Job_00003:   1   Median :124.03  
##  Job_00004:   1   Mean   :125.40  
##  Job_00007:   1   3rd Qu.:131.06  
##  Job_00008:   1   Max.   :193.73  
##  (Other)  :6974

Output Variable

The Output Variable y3 shows right skewness, so will proceed with a log transformation

Histogram

df=gather(select_at(data,output.var))
ggplot(df, aes(x=value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() 

  #stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  

QQPlot

ggplot(gather(select_at(data,output.var)), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Trasformation of Output Variable from y3 to y3.log

if(log.pred==TRUE) data[[output.var.tr]] = log(data[[output.var]],10) else
  data[[output.var.tr]] = data[[output.var]]
df=gather(select_at(data,c(output.var,output.var.tr)))
ggplot(df, aes(value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() + 
  # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
  facet_wrap(~key, scales = 'free',ncol=2)

ggplot(gather(select_at(data,c(output.var,output.var.tr))), aes(sample=value)) + 
  stat_qq() + 
  facet_wrap(~key, scales = 'free',ncol=4)

Best Normalizator y3

Normalization of y3 using bestNormalize package. (suggested orderNorm) This is cool, but I think is too far for the objective of the project

t=bestNormalize::bestNormalize(data[[output.var]])
t
## Best Normalizing transformation with 6980 Observations
##  Estimated Normality Statistics (Pearson P / df, lower => more normal):
##  - No transform: 2.9491 
##  - Box-Cox: 1.4149 
##  - Log_b(x+a): 1.9707 
##  - sqrt(x+a): 2.385 
##  - exp(x): 749.5512 
##  - arcsinh(x): 1.9705 
##  - Yeo-Johnson: 1.1284 
##  - orderNorm: 1.0815 
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##  
## Based off these, bestNormalize chose:
## orderNorm Transformation with 6980 nonmissing obs and no ties 
##  - Original quantiles:
##      0%     25%     50%     75%    100% 
##  95.913 118.289 124.030 131.059 193.726
qqnorm(data[[output.var]])

qqnorm(predict(t))

orderNorm() is a rank-based procedure by which the values of a vector are mapped to their percentile, which is then mapped to the same percentile of the normal distribution. Without the presence of ties, this essentially guarantees that the transformation leads to a uniform distribution

Predictors

All predictors show a Fat-Tail situation, where the two tails are very tall, and a low distribution around the mean. The orderNorm transformation can help (see [Best Normalizator] section)

Interesting Predictors

Histograms

cols = c('x11','x18','stat98','x7','stat110')
df=gather(select_at(data,cols))
ggplot(df, aes(value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() + 
  # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
  facet_wrap(~key, scales = 'free',ncol=3)

# ggplot(gather(select_at(data,cols)), aes(sample=value)) + 
#   stat_qq()+
#   facet_wrap(~key, scales = 'free',ncol=2)

lapply(select_at(data,cols),summary)
## $x11
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 9.000e-08 9.494e-08 1.001e-07 1.001e-07 1.052e-07 1.100e-07 
## 
## $x18
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.500   3.147   4.769   4.772   6.418   7.999 
## 
## $stat98
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -2.998619 -1.551882 -0.015993 -0.005946  1.528405  2.999499 
## 
## $x7
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.700   1.266   1.854   1.852   2.446   3.000 
## 
## $stat110
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -2.999543 -1.496865 -0.002193 -0.004129  1.504273  2.999563

Scatter plot vs. output variable **y3.log

d = gather(dplyr::select_at(data,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
  geom_point(color='light green',alpha=0.5) + 
  geom_smooth() + 
  facet_wrap(~target, scales = 'free',ncol=3)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

All Predictors

Histograms

All indicators have a strong indication of Fat-Tails

df=gather(select_at(data,predictors))
ggplot(df, aes(value)) + 
  geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
  geom_density() + 
  # stat_function(fun = dnorm, n = 100, args = list(mean = mean(df$value), sd = sd(df$value)))  
  facet_wrap(~key, scales = 'free',ncol=4)

Correlations

With Output Variable

#chart.Correlation(select(data,-JobName),  pch=21)
t=as.data.frame(round(cor(dplyr::select(data,-one_of(output.var.tr,'JobName'))
                          ,select_at(data,output.var.tr)),4))  %>%
  rownames_to_column(var='variable') %>% filter(variable != !!output.var) %>% arrange(-y3.log)
#DT::datatable(t)
message("Top Positive")
## Top Positive
kable(head(arrange(t,desc(y3.log)),20))
variable y3.log
x18 0.3120
x7 0.2091
stat98 0.1784
x9 0.1127
x17 0.0611
x16 0.0489
x10 0.0472
x21 0.0412
x11 0.0322
x8 0.0318
stat156 0.0287
stat23 0.0234
stat100 0.0206
stat144 0.0203
stat59 0.0202
stat60 0.0199
stat195 0.0199
stat141 0.0194
stat73 0.0192
stat197 0.0185
message("Top Negative")
## Top Negative
kable(head(arrange(t,y3.log),20))
variable y3.log
stat110 -0.1594
x4 -0.0603
stat13 -0.0345
stat41 -0.0345
stat14 -0.0317
stat149 -0.0309
stat113 -0.0279
stat4 -0.0248
stat106 -0.0236
stat146 -0.0236
stat186 -0.0217
stat91 -0.0210
stat214 -0.0209
stat5 -0.0207
stat22 -0.0202
stat39 -0.0202
stat175 -0.0194
stat187 -0.0193
stat128 -0.0192
stat37 -0.0191

Between All Variables

#chart.Correlation(select(data,-JobName),  pch=21)
t=as.data.frame(round(cor(dplyr::select(data,-one_of('JobName'))),4))
#DT::datatable(t,options=list(scrollX=T))
message("Showing only 10 variables")
## Showing only 10 variables
kable(t[1:10,1:10])
x1 x2 x3 x4 x5 x6 x7 x8 x9 x10
x1 1.0000 0.0034 -0.0028 0.0085 0.0068 0.0159 0.0264 -0.0012 0.0142 0.0013
x2 0.0034 1.0000 -0.0057 0.0004 -0.0094 -0.0101 0.0089 0.0078 0.0049 -0.0214
x3 -0.0028 -0.0057 1.0000 0.0029 0.0046 0.0006 -0.0105 -0.0002 0.0167 -0.0137
x4 0.0085 0.0004 0.0029 1.0000 -0.0059 0.0104 0.0098 0.0053 0.0061 -0.0023
x5 0.0068 -0.0094 0.0046 -0.0059 1.0000 0.0016 -0.0027 0.0081 0.0259 -0.0081
x6 0.0159 -0.0101 0.0006 0.0104 0.0016 1.0000 0.0200 -0.0157 0.0117 -0.0072
x7 0.0264 0.0089 -0.0105 0.0098 -0.0027 0.0200 1.0000 -0.0018 -0.0069 -0.0221
x8 -0.0012 0.0078 -0.0002 0.0053 0.0081 -0.0157 -0.0018 1.0000 0.0142 -0.0004
x9 0.0142 0.0049 0.0167 0.0061 0.0259 0.0117 -0.0069 0.0142 1.0000 0.0149
x10 0.0013 -0.0214 -0.0137 -0.0023 -0.0081 -0.0072 -0.0221 -0.0004 0.0149 1.0000

Scatter Plots with Output Variable

Scatter plots with all predictors and the output variable (y3.log)

d = gather(dplyr::select_at(data,c(predictors,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
  geom_point(color='light blue',alpha=0.5) + 
  geom_smooth() + 
  facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Multicollinearity - VIF

No Multicollinearity among predictors

Showing Top predictor by VIF Value

vifDF = usdm::vif(select_at(data,predictors)) %>% arrange(desc(VIF))
head(vifDF,15)
##    Variables      VIF
## 1    stat145 1.063704
## 2    stat142 1.062756
## 3    stat154 1.061976
## 4      stat1 1.059943
## 5      stat5 1.059813
## 6     stat93 1.059749
## 7     stat32 1.059513
## 8    stat156 1.059133
## 9     stat86 1.058644
## 10    stat14 1.058343
## 11    stat92 1.058224
## 12   stat150 1.058180
## 13   stat127 1.058168
## 14   stat200 1.058121
## 15   stat131 1.058006

Feature Eng

  • Square Root transformation for x18
data.tr=data %>%
  mutate(x18.sqrt = sqrt(x18)) 
cols=c('x18','x18.sqrt')

Comparing Pre and Post Transformation Density Plots

# ggplot(gather(select_at(data.tr,cols)), aes(value)) + 
#   geom_histogram(aes(y=..density..),bins = 50,fill='light blue') + 
#   geom_density() + 
#   facet_wrap(~key, scales = 'free',ncol=4)

d = gather(dplyr::select_at(data.tr,c(cols,output.var.tr)),key=target,value=value,-!!output.var.tr)
ggplot(data=d, aes_string(x='value',y=output.var.tr)) + 
  geom_point(color='light blue',alpha=0.5) + 
  geom_smooth() + 
  facet_wrap(~target, scales = 'free',ncol=4)
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

#removing unwanted variables
data.tr=data.tr %>%
  dplyr::select_at(names(data.tr)[! names(data.tr) %in% c('x18','y3','JobName')])

data=data.tr
label.names=output.var.tr

Modeling

PCA

fullInteraction=TRUE

pca.vars  = names(data)
pca.vars = pca.vars[!pca.vars %in% label.names]

if(fullInteraction){
  pca.formula =as.formula(paste0('~(',paste0(pca.vars, collapse ='+'),')^2'))
  pca.model = prcomp(formula=pca.formula,data=data[,pca.vars],center=T,scale.=T,retx = T)
  #saveRDS(pca.model,'pca.model.rds')
} else {
  pca.model =  prcomp(x=data[,pca.vars],center=T,scale.=T,retx = T)
}
targetCumVar = .8

pca.model$var = pca.model$sdev ^ 2 #eigenvalues
pca.model$pvar = pca.model$var / sum(pca.model$var)
pca.model$cumpvar = cumsum(pca.model$pvar )
pca.model$pcaSel = pca.model$cumpvar<=targetCumVar
pca.model$pcaSelCount = sum(pca.model$pcaSel)
pca.model$pcaSelTotVar = sum(pca.model$pvar[pca.model$pcaSel])
message(pca.model$pcaSelCount, " PCAs justify ",percent(targetCumVar)," of the total Variance. (",percent(pca.model$pcaSelTotVar),")")
## 3834 PCAs justify 80.0% of the total Variance. (80.0%)
plot(pca.model$var,xlab="Principal component", ylab="Proportion of variance explained",   type='b')

plot(cumsum(pca.model$pvar ),xlab="Principal component", ylab="Cumulative Proportion of variance explained", ylim=c(0,1), type='b')

screeplot(pca.model,npcs = pca.model$pcaSelCount)

screeplot(pca.model,npcs = pca.model$pcaSelCount,type='lines')

#summary(pca.model)
#pca.model$rotation
#creating dataset
data.pca = dplyr::select(data,!!label.names) %>% 
  dplyr::bind_cols(dplyr::select(as.data.frame(pca.model$x)
                                 ,!!colnames(pca.model$rotation)[pca.model$pcaSel])
  )

Train Test Split

data.pca = data[sample(nrow(data.pca)),] # randomly shuffle data
split = sample.split(data.pca[,label.names], SplitRatio = 0.8)

data.train = subset(data.pca, split == TRUE)
data.test = subset(data.pca, split == FALSE)

Common Functions

plot.diagnostics <-  function(model, train) {
  plot(model)
  
  residuals = resid(model) # Plotted above in plot(lm.out)
  r.standard = rstandard(model)
  r.student = rstudent(model)
  
  df = data.frame(x=predict(model,train),y=r.student)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = 0,size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  
  df = data.frame(x=predict(model,train),y=r.standard)
  p=ggplot(data=df,aes(x=x,y=y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_hline(yintercept = c(-2,0,2),size=1)+
    ylab("Student Residuals") +
    xlab("Predicted Values")+
    ggtitle("Student Residual Plot")
  plot(p)
  # Histogram
  df=data.frame(r.student)
  p=ggplot(data=df,aes(r.student)) +
    geom_histogram(aes(y=..density..),bins = 50,fill='blue',alpha=0.6) + 
    stat_function(fun = dnorm, n = 100, args = list(mean = 0, sd = 1)) +
    ylab("Density")+
    xlab("Studentized Residuals")+
    ggtitle("Distribution of Studentized Residuals")
  plot(p)
  # http://www.stat.columbia.edu/~martin/W2024/R7.pdf
  # Influential plots
  inf.meas = influence.measures(model)
  # print (summary(inf.meas)) # too much data
  
  # Leverage plot
  lev = hat(model.matrix(model))
  df=tibble::rownames_to_column(as.data.frame(lev),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=lev)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    ylab('Leverage - check') + 
    xlab('Index')
  plot(p)
  # Cook's Distance
  cd = cooks.distance(model)
  df=tibble::rownames_to_column(as.data.frame(cd),'id')
  p=ggplot(data=df,aes(x=as.numeric(id),y=cd)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_text(data=filter(df,cd>15/nrow(train)),aes(label=id),check_overlap=T,size=3,vjust=-.5)+
    ylab('Cooks distances') + 
    geom_hline(yintercept = c(4/nrow(train),0),size=1)+
    xlab('Index')
  plot(p)
  print (paste("Number of data points that have Cook's D > 4/n: ", length(cd[cd > 4/nrow(train)]), sep = "")) 
  print (paste("Number of data points that have Cook's D > 1: ", length(cd[cd > 1]), sep = "")) 
  return(cd)
}

# function to set up random seeds
# Based on http://jaehyeon-kim.github.io/2015/05/Setup-Random-Seeds-on-Caret-Package.html 
setCaretSeeds <- function(method = "cv", numbers = 1, repeats = 1, tunes = NULL, seed = 1701) {
  #B is the number of resamples and integer vector of M (numbers + tune length if any)
  B <- if (method == "cv") numbers
  else if(method == "repeatedcv") numbers * repeats
  else NULL
  if(is.null(length)) {
    seeds <- NULL
  } else {
    set.seed(seed = seed)
    seeds <- vector(mode = "list", length = B)
    seeds <- lapply(seeds, function(x) sample.int(n = 1000000
                                                  , size = numbers + ifelse(is.null(tunes), 0, tunes)))
    seeds[[length(seeds) + 1]] <- sample.int(n = 1000000, size = 1)
  }
  # return seeds
  seeds
}



train.caret.glmselect = function(formula, data, method
                                 ,subopt = NULL, feature.names
                                 , train.control = NULL, tune.grid = NULL, pre.proc = NULL){
  
  if(is.null(train.control)){
    train.control <- trainControl(method = "cv"
                              ,number = 10
                              ,seeds = setCaretSeeds(method = "cv"
                                                     , numbers = 10
                                                     , seed = 1701)
                              ,search = "grid"
                              ,verboseIter = TRUE
                              ,allowParallel = TRUE
                              )
  }
  
  if(is.null(tune.grid)){
    if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
      tune.grid = data.frame(nvmax = 1:length(feature.names))
    }
    if (method == 'glmnet' && subopt == 'LASSO'){
      # Will only show 1 Lambda value during training, but that is OK
      # https://stackoverflow.com/questions/47526544/why-need-to-tune-lambda-with-carettrain-method-glmnet-and-cv-glmnet
      # Another option for LASSO is this: https://github.com/topepo/caret/blob/master/RegressionTests/Code/lasso.R
      lambda = 10^seq(-2,0, length =100)
      alpha = c(1)
      tune.grid = expand.grid(alpha = alpha,lambda = lambda)
    }
    if (method == 'lars'){
      # https://github.com/topepo/caret/blob/master/RegressionTests/Code/lars.R
      fraction = seq(0, 1, length = 100)
      tune.grid = expand.grid(fraction = fraction)
      pre.proc = c("center", "scale") 
    }
  }
  
  # http://sshaikh.org/2015/05/06/parallelize-machine-learning-in-r-with-multi-core-cpus/
  cl <- makeCluster(ceiling(detectCores()*0.85)) # use 75% of cores only, leave rest for other tasks
  registerDoParallel(cl)

  set.seed(1) 
  # note that the seed has to actually be set just before this function is called
  # settign is above just not ensure reproducibility for some reason
  model.caret <- caret::train(formula
                              , data = data
                              , method = method
                              , tuneGrid = tune.grid
                              , trControl = train.control
                              , preProc = pre.proc
                              )
  
  stopCluster(cl)
  registerDoSEQ() # register sequential engine in case you are not using this function anymore
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    print("All models results")
    print(model.caret$results) # all model results
    print("Best Model")
    print(model.caret$bestTune) # best model
    model = model.caret$finalModel

    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-nvmax) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=nvmax,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    # leap function does not support studentized residuals
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)
   
    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    id = rownames(model.caret$bestTune)    
    # Provides the coefficients of the best model
    # regsubsets doens return a full model (see documentation of regsubset), so we need to recalcualte themodel
    # https://stackoverflow.com/questions/13063762/how-to-obtain-a-lm-object-from-regsubsets
    print("Coefficients of final model:")
    coefs <- coef(model, id=id)
    #calculate the model to the the coef intervals
    nams <- names(coefs)
    nams <- nams[!nams %in% "(Intercept)"]
    response <-  as.character(formula[[2]])
    form <- as.formula(paste(response, paste(nams, collapse = " + "), sep = " ~ "))
    mod <- lm(form, data = data)
    #coefs
    #coef(mod)
    print(car::Confint(mod))
    return(list(model = model,id = id, residPlot = residPlot, residHistogram=residHistogram
                ,modelLM=mod))
  }
  if (method == 'glmnet' && subopt == 'LASSO'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    print(model.caret$results)
    model=model.caret$finalModel
    # Metrics Plot 
    dataPlot = model.caret$results %>%
      gather(key='metric',value='value',-lambda) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=lambda,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot 
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') +
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    #no interval for glmnet: https://stackoverflow.com/questions/39750965/confidence-intervals-for-ridge-regression
    t=coef(model,s=model.caret$bestTune$lambda)
    model.coef = t[which(t[,1]!=0),]
    print(as.data.frame(model.coef))
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, metricsPlot=metricsPlot ))
  }
  if (method == 'lars'){
    print(model.caret)
    print(plot(model.caret))
    print(model.caret$bestTune)
    
    # Metrics Plot
    dataPlot = model.caret$results %>%
        gather(key='metric',value='value',-fraction) %>%
      dplyr::filter(metric %in% c('MAE','RMSE','Rsquared'))
    metricsPlot = ggplot(data=dataPlot,aes(x=fraction,y=value) ) +
      geom_line(color='lightblue4') +
      geom_point(color='blue',alpha=0.7,size=.9) +
      facet_wrap(~metric,ncol=2,scales='free_y')+
      theme_light()
    plot(metricsPlot)
    
    # Residuals Plot
    dataPlot=data.frame(pred=predict(model.caret,data),res=resid(model.caret))
    residPlot = ggplot(dataPlot,aes(x=pred,y=res)) +
      geom_point(color='light blue',alpha=0.7) +
      geom_smooth(method="lm")+
      theme_light()
    plot(residPlot)

    residHistogram = ggplot(dataPlot,aes(x=res)) +
      geom_histogram(aes(y=..density..),fill='light blue',alpha=1) +
      #geom_density(color='lightblue4') + 
      stat_function(fun = dnorm, n = 100, args = list(mean = mean(dataPlot$res)
                                                       , sd = sd(dataPlot$res)),color='lightblue4')  
      theme_light()
    plot(residHistogram)
    
    print("Coefficients") 
    t=coef(model.caret$finalModel,s=model.caret$bestTune$fraction,mode='fraction')
    model.coef = t[which(t!=0)]
    print(model.coef)
    id = NULL # not really needed but added for consistency
    return(list(model = model.caret,id = id, residPlot = residPlot, residHistogram=residHistogram))
  }
}

# https://stackoverflow.com/questions/48265743/linear-model-subset-selection-goodness-of-fit-with-k-fold-cross-validation
# changed slightly since call[[2]] was just returning "formula" without actually returnign the value in formula
predict.regsubsets <- function(object, newdata, id, formula, ...) {
    #form <- as.formula(object$call[[2]])
    mat <- model.matrix(formula, newdata) # adds intercept and expands any interaction terms
    coefi <- coef(object, id = id)
    xvars <- names(coefi)
    return(mat[,xvars]%*%coefi)
}
  
test.model = function(model, test, level=0.95
                      ,draw.limits = FALSE, good = 0.1, ok = 0.15
                      ,method = NULL, subopt = NULL
                      ,id = NULL, formula, feature.names, label.names
                      ,transformation = NULL){
  ## if using caret for glm select equivalent functionality, 
  ## need to pass formula (full is ok as it will select subset of variables from there)
  if (is.null(method)){
    pred = predict(model, newdata=test, interval="confidence", level = level) 
  }
  
  if (method == 'leapForward' | method == 'leapBackward' | method == 'leapSeq'){
    pred = predict.regsubsets(model, newdata = test, id = id, formula = formula)
  }
  
  if (method == 'glmnet' && subopt == 'LASSO'){
    xtest = as.matrix(test[,feature.names]) 
    pred=as.data.frame(predict(model, xtest))
  }
  
  if (method == 'lars'){
    pred=as.data.frame(predict(model, newdata = test))
  }
    
  # Summary of predicted values
  print ("Summary of predicted values: ")
  print(summary(pred[,1]))

  test.mse = mean((test[,label.names]-pred[,1])^2)
  print (paste(method, subopt, "Test MSE:", test.mse, sep=" "))
  
  if(log.pred == TRUE || norm.pred == TRUE){
    # plot transformewd comparison first
    df=data.frame(x=test[,label.names],y=pred[,1])
    ggplot(df,aes(x=x,y=y)) +
      geom_point(color='blue',alpha=0.5,shape=20,size=2) +
      geom_abline(slope=1,intercept=0,color='black',size=1) +
      #scale_y_continuous(limits=c(min(df),max(df)))+
      xlab("Actual (Transformed)")+
      ylab("Predicted (Transformed)")
  }
    
  if (log.pred == FALSE && norm.pred == FALSE){
    x = test[,label.names]
    y = pred[,1]
  }
  if (log.pred == TRUE){
    x = 10^test[,label.names]
    y = 10^pred[,1]  
  }
  if (norm.pred == TRUE){
    x = predict(transformation, test[,label.names], inverse = TRUE)
    y = predict(transformation, pred[,1], inverse = TRUE)
  }

  df=data.frame(x,y)
  ggplot(df,aes(x,y)) +
    geom_point(color='blue',alpha=0.5,shape=20,size=2) +
    geom_abline(slope=c(1+good,1-good,1+ok,1-ok)
                ,intercept=rep(0,4),color=c('dark green','dark green','dark red','dark red'),size=1,alpha=0.8) +
    #scale_y_continuous(limits=c(min(df),max(df)))+
    xlab("Actual")+
    ylab("Predicted") 
    
 
}

Setup Formulae

n <- names(data.train)
 formula <- as.formula(paste(paste(n[n %in% label.names], collapse = " + ")
                             ," ~", paste(n[!n %in% label.names], collapse = " + "))) 

grand.mean.formula = as.formula(paste(paste(n[n %in% label.names], collapse = " + ")," ~ 1"))

print(formula)
## y3.log ~ x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + 
##     x12 + x13 + x14 + x15 + x16 + x17 + x19 + x20 + x21 + x22 + 
##     x23 + stat1 + stat2 + stat3 + stat4 + stat5 + stat6 + stat7 + 
##     stat8 + stat9 + stat10 + stat11 + stat12 + stat13 + stat14 + 
##     stat15 + stat16 + stat17 + stat18 + stat19 + stat20 + stat21 + 
##     stat22 + stat23 + stat24 + stat25 + stat26 + stat27 + stat28 + 
##     stat29 + stat30 + stat31 + stat32 + stat33 + stat34 + stat35 + 
##     stat36 + stat37 + stat38 + stat39 + stat40 + stat41 + stat42 + 
##     stat43 + stat44 + stat45 + stat46 + stat47 + stat48 + stat49 + 
##     stat50 + stat51 + stat52 + stat53 + stat54 + stat55 + stat56 + 
##     stat57 + stat58 + stat59 + stat60 + stat61 + stat62 + stat63 + 
##     stat64 + stat65 + stat66 + stat67 + stat68 + stat69 + stat70 + 
##     stat71 + stat72 + stat73 + stat74 + stat75 + stat76 + stat77 + 
##     stat78 + stat79 + stat80 + stat81 + stat82 + stat83 + stat84 + 
##     stat85 + stat86 + stat87 + stat88 + stat89 + stat90 + stat91 + 
##     stat92 + stat93 + stat94 + stat95 + stat96 + stat97 + stat98 + 
##     stat99 + stat100 + stat101 + stat102 + stat103 + stat104 + 
##     stat105 + stat106 + stat107 + stat108 + stat109 + stat110 + 
##     stat111 + stat112 + stat113 + stat114 + stat115 + stat116 + 
##     stat117 + stat118 + stat119 + stat120 + stat121 + stat122 + 
##     stat123 + stat124 + stat125 + stat126 + stat127 + stat128 + 
##     stat129 + stat130 + stat131 + stat132 + stat133 + stat134 + 
##     stat135 + stat136 + stat137 + stat138 + stat139 + stat140 + 
##     stat141 + stat142 + stat143 + stat144 + stat145 + stat146 + 
##     stat147 + stat148 + stat149 + stat150 + stat151 + stat152 + 
##     stat153 + stat154 + stat155 + stat156 + stat157 + stat158 + 
##     stat159 + stat160 + stat161 + stat162 + stat163 + stat164 + 
##     stat165 + stat166 + stat167 + stat168 + stat169 + stat170 + 
##     stat171 + stat172 + stat173 + stat174 + stat175 + stat176 + 
##     stat177 + stat178 + stat179 + stat180 + stat181 + stat182 + 
##     stat183 + stat184 + stat185 + stat186 + stat187 + stat188 + 
##     stat189 + stat190 + stat191 + stat192 + stat193 + stat194 + 
##     stat195 + stat196 + stat197 + stat198 + stat199 + stat200 + 
##     stat201 + stat202 + stat203 + stat204 + stat205 + stat206 + 
##     stat207 + stat208 + stat209 + stat210 + stat211 + stat212 + 
##     stat213 + stat214 + stat215 + stat216 + stat217 + x18.sqrt
print(grand.mean.formula)
## y3.log ~ 1
# Update feature.names because we may have transformed some features
feature.names = n[!n %in% label.names]

Full Model

model.full = lm(formula , data.train)
summary(model.full)
## 
## Call:
## lm(formula = formula, data = data.train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.082957 -0.020817 -0.004661  0.016249  0.160450 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.967e+00  9.510e-03 206.834  < 2e-16 ***
## x1          -3.335e-04  6.533e-04  -0.510  0.60976    
## x2           3.317e-04  4.148e-04   0.800  0.42402    
## x3           5.063e-05  1.150e-04   0.440  0.65965    
## x4          -4.629e-05  8.995e-06  -5.146 2.76e-07 ***
## x5           1.793e-04  2.944e-04   0.609  0.54252    
## x6           1.391e-04  5.935e-04   0.234  0.81475    
## x7           1.104e-02  6.382e-04  17.300  < 2e-16 ***
## x8           4.382e-04  1.489e-04   2.944  0.00326 ** 
## x9           3.070e-03  3.306e-04   9.288  < 2e-16 ***
## x10          1.223e-03  3.082e-04   3.967 7.38e-05 ***
## x11          2.028e+05  7.358e+04   2.756  0.00586 ** 
## x12          1.078e-04  1.883e-04   0.573  0.56682    
## x13          3.925e-05  7.480e-05   0.525  0.59973    
## x14         -4.059e-04  3.243e-04  -1.252  0.21066    
## x15          1.204e-04  3.074e-04   0.392  0.69526    
## x16          1.021e-03  2.136e-04   4.780 1.80e-06 ***
## x17          1.858e-03  3.249e-04   5.718 1.14e-08 ***
## x19          1.350e-04  1.650e-04   0.818  0.41329    
## x20         -9.016e-04  1.151e-03  -0.783  0.43339    
## x21          1.082e-04  4.211e-05   2.568  0.01024 *  
## x22         -4.039e-04  3.467e-04  -1.165  0.24416    
## x23          2.964e-04  3.288e-04   0.901  0.36746    
## stat1       -1.579e-04  2.482e-04  -0.636  0.52475    
## stat2        1.423e-04  2.458e-04   0.579  0.56262    
## stat3        4.660e-04  2.484e-04   1.876  0.06073 .  
## stat4       -5.826e-04  2.492e-04  -2.338  0.01944 *  
## stat5       -1.030e-04  2.478e-04  -0.416  0.67778    
## stat6       -3.683e-05  2.481e-04  -0.148  0.88201    
## stat7       -1.612e-04  2.477e-04  -0.651  0.51522    
## stat8        2.214e-04  2.476e-04   0.894  0.37132    
## stat9       -2.382e-04  2.472e-04  -0.964  0.33524    
## stat10      -3.094e-04  2.481e-04  -1.247  0.21242    
## stat11      -9.168e-06  2.494e-04  -0.037  0.97067    
## stat12       2.652e-04  2.469e-04   1.074  0.28287    
## stat13      -6.644e-04  2.465e-04  -2.695  0.00705 ** 
## stat14      -7.599e-04  2.461e-04  -3.088  0.00202 ** 
## stat15      -2.643e-04  2.453e-04  -1.077  0.28138    
## stat16       8.957e-05  2.470e-04   0.363  0.71688    
## stat17       7.868e-06  2.458e-04   0.032  0.97446    
## stat18      -3.596e-04  2.479e-04  -1.451  0.14687    
## stat19       1.908e-05  2.467e-04   0.077  0.93836    
## stat20      -1.633e-04  2.487e-04  -0.657  0.51153    
## stat21      -3.260e-05  2.481e-04  -0.131  0.89549    
## stat22      -5.024e-04  2.488e-04  -2.020  0.04347 *  
## stat23       5.723e-04  2.474e-04   2.313  0.02075 *  
## stat24      -5.039e-04  2.483e-04  -2.030  0.04243 *  
## stat25      -6.027e-04  2.470e-04  -2.440  0.01474 *  
## stat26      -3.665e-04  2.469e-04  -1.484  0.13785    
## stat27       3.341e-05  2.483e-04   0.135  0.89298    
## stat28       5.207e-05  2.482e-04   0.210  0.83381    
## stat29       1.261e-04  2.494e-04   0.506  0.61317    
## stat30       2.392e-04  2.490e-04   0.961  0.33663    
## stat31       2.850e-05  2.496e-04   0.114  0.90910    
## stat32       2.199e-04  2.485e-04   0.885  0.37617    
## stat33      -4.095e-04  2.479e-04  -1.652  0.09863 .  
## stat34      -5.030e-05  2.472e-04  -0.203  0.83879    
## stat35      -3.877e-04  2.475e-04  -1.566  0.11730    
## stat36      -6.788e-05  2.456e-04  -0.276  0.78226    
## stat37      -2.305e-04  2.496e-04  -0.923  0.35586    
## stat38       3.643e-04  2.469e-04   1.476  0.14013    
## stat39      -2.239e-04  2.453e-04  -0.913  0.36134    
## stat40       4.679e-05  2.479e-04   0.189  0.85029    
## stat41      -6.082e-04  2.456e-04  -2.476  0.01332 *  
## stat42      -2.666e-04  2.462e-04  -1.083  0.27898    
## stat43      -1.332e-04  2.484e-04  -0.536  0.59188    
## stat44       5.118e-05  2.477e-04   0.207  0.83631    
## stat45      -4.018e-04  2.471e-04  -1.626  0.10395    
## stat46       2.959e-04  2.475e-04   1.195  0.23197    
## stat47      -1.944e-04  2.505e-04  -0.776  0.43771    
## stat48       3.389e-04  2.477e-04   1.369  0.17119    
## stat49       1.543e-04  2.466e-04   0.626  0.53153    
## stat50       2.624e-04  2.446e-04   1.073  0.28348    
## stat51       2.551e-04  2.476e-04   1.030  0.30298    
## stat52       1.664e-04  2.482e-04   0.671  0.50245    
## stat53      -1.117e-04  2.501e-04  -0.447  0.65505    
## stat54      -3.039e-04  2.490e-04  -1.220  0.22236    
## stat55       1.464e-04  2.454e-04   0.596  0.55093    
## stat56      -1.606e-04  2.481e-04  -0.647  0.51750    
## stat57      -1.618e-04  2.455e-04  -0.659  0.51001    
## stat58       1.598e-04  2.468e-04   0.647  0.51743    
## stat59       3.296e-04  2.471e-04   1.334  0.18235    
## stat60       4.065e-04  2.479e-04   1.640  0.10116    
## stat61      -1.628e-04  2.480e-04  -0.656  0.51163    
## stat62      -2.854e-04  2.469e-04  -1.156  0.24775    
## stat63      -1.058e-05  2.476e-04  -0.043  0.96591    
## stat64      -2.240e-04  2.461e-04  -0.910  0.36272    
## stat65      -6.171e-04  2.502e-04  -2.467  0.01367 *  
## stat66       8.904e-05  2.503e-04   0.356  0.72210    
## stat67       1.662e-04  2.492e-04   0.667  0.50490    
## stat68      -7.448e-05  2.488e-04  -0.299  0.76471    
## stat69      -1.223e-04  2.482e-04  -0.493  0.62238    
## stat70       4.184e-04  2.460e-04   1.701  0.08904 .  
## stat71      -9.952e-07  2.459e-04  -0.004  0.99677    
## stat72       1.125e-04  2.486e-04   0.453  0.65091    
## stat73       3.657e-04  2.488e-04   1.470  0.14155    
## stat74       1.594e-04  2.480e-04   0.643  0.52038    
## stat75      -1.272e-04  2.505e-04  -0.508  0.61164    
## stat76       5.902e-05  2.487e-04   0.237  0.81242    
## stat77      -1.312e-04  2.475e-04  -0.530  0.59606    
## stat78      -1.895e-04  2.473e-04  -0.766  0.44358    
## stat79      -1.691e-04  2.467e-04  -0.685  0.49316    
## stat80       1.611e-04  2.485e-04   0.648  0.51684    
## stat81       9.758e-05  2.479e-04   0.394  0.69383    
## stat82       1.953e-04  2.477e-04   0.788  0.43058    
## stat83       1.436e-04  2.477e-04   0.580  0.56202    
## stat84      -9.901e-05  2.476e-04  -0.400  0.68925    
## stat85       7.838e-05  2.472e-04   0.317  0.75123    
## stat86       4.018e-04  2.473e-04   1.625  0.10421    
## stat87      -1.974e-04  2.491e-04  -0.793  0.42810    
## stat88      -2.313e-04  2.454e-04  -0.942  0.34604    
## stat89      -3.285e-04  2.451e-04  -1.340  0.18027    
## stat90      -2.817e-04  2.483e-04  -1.134  0.25678    
## stat91      -4.300e-04  2.467e-04  -1.743  0.08140 .  
## stat92      -2.285e-04  2.480e-04  -0.921  0.35694    
## stat93      -9.587e-05  2.502e-04  -0.383  0.70155    
## stat94      -2.673e-05  2.466e-04  -0.108  0.91369    
## stat95      -1.052e-04  2.485e-04  -0.423  0.67213    
## stat96      -3.504e-04  2.460e-04  -1.425  0.15433    
## stat97      -2.797e-05  2.461e-04  -0.114  0.90954    
## stat98       3.531e-03  2.445e-04  14.442  < 2e-16 ***
## stat99       4.495e-05  2.489e-04   0.181  0.85667    
## stat100      4.922e-04  2.481e-04   1.984  0.04730 *  
## stat101     -7.072e-05  2.495e-04  -0.283  0.77685    
## stat102      1.614e-04  2.491e-04   0.648  0.51704    
## stat103     -7.755e-06  2.503e-04  -0.031  0.97529    
## stat104     -1.423e-04  2.476e-04  -0.575  0.56546    
## stat105      3.005e-04  2.452e-04   1.226  0.22037    
## stat106     -6.922e-05  2.468e-04  -0.280  0.77914    
## stat107     -4.644e-05  2.472e-04  -0.188  0.85099    
## stat108     -2.835e-04  2.473e-04  -1.147  0.25158    
## stat109     -2.545e-05  2.467e-04  -0.103  0.91785    
## stat110     -3.475e-03  2.483e-04 -13.995  < 2e-16 ***
## stat111      9.508e-06  2.466e-04   0.039  0.96925    
## stat112     -2.202e-05  2.501e-04  -0.088  0.92984    
## stat113     -1.389e-04  2.481e-04  -0.560  0.57572    
## stat114      4.755e-05  2.466e-04   0.193  0.84712    
## stat115      1.458e-04  2.473e-04   0.590  0.55546    
## stat116      2.875e-04  2.489e-04   1.155  0.24810    
## stat117      1.622e-04  2.500e-04   0.649  0.51664    
## stat118     -2.623e-04  2.457e-04  -1.068  0.28578    
## stat119      6.218e-05  2.479e-04   0.251  0.80200    
## stat120      8.084e-05  2.478e-04   0.326  0.74428    
## stat121     -2.385e-04  2.485e-04  -0.960  0.33708    
## stat122     -5.800e-06  2.456e-04  -0.024  0.98116    
## stat123     -2.556e-05  2.500e-04  -0.102  0.91857    
## stat124     -7.468e-06  2.485e-04  -0.030  0.97603    
## stat125      1.178e-04  2.485e-04   0.474  0.63555    
## stat126      1.513e-04  2.465e-04   0.614  0.53925    
## stat127      2.473e-04  2.461e-04   1.005  0.31487    
## stat128     -1.034e-04  2.474e-04  -0.418  0.67610    
## stat129      5.386e-05  2.457e-04   0.219  0.82651    
## stat130      4.596e-04  2.462e-04   1.867  0.06192 .  
## stat131     -9.985e-06  2.482e-04  -0.040  0.96791    
## stat132     -5.464e-05  2.456e-04  -0.223  0.82391    
## stat133      6.713e-05  2.469e-04   0.272  0.78570    
## stat134     -2.619e-04  2.460e-04  -1.065  0.28703    
## stat135     -6.020e-06  2.482e-04  -0.024  0.98065    
## stat136     -2.823e-04  2.496e-04  -1.131  0.25806    
## stat137      5.450e-05  2.453e-04   0.222  0.82417    
## stat138      3.746e-05  2.467e-04   0.152  0.87932    
## stat139      1.098e-04  2.489e-04   0.441  0.65915    
## stat140      9.217e-05  2.462e-04   0.374  0.70816    
## stat141      2.671e-04  2.464e-04   1.084  0.27851    
## stat142     -1.296e-04  2.497e-04  -0.519  0.60371    
## stat143      3.722e-05  2.476e-04   0.150  0.88053    
## stat144      5.702e-04  2.462e-04   2.316  0.02057 *  
## stat145     -6.333e-05  2.509e-04  -0.252  0.80077    
## stat146     -5.783e-04  2.479e-04  -2.332  0.01971 *  
## stat147     -3.060e-04  2.489e-04  -1.229  0.21897    
## stat148     -3.749e-04  2.457e-04  -1.526  0.12713    
## stat149     -4.942e-04  2.489e-04  -1.986  0.04711 *  
## stat150      6.109e-05  2.487e-04   0.246  0.80598    
## stat151     -3.107e-04  2.499e-04  -1.244  0.21367    
## stat152     -1.324e-04  2.480e-04  -0.534  0.59359    
## stat153      2.898e-05  2.508e-04   0.116  0.90800    
## stat154     -4.313e-06  2.491e-04  -0.017  0.98619    
## stat155      4.102e-05  2.466e-04   0.166  0.86786    
## stat156      4.230e-04  2.494e-04   1.696  0.08998 .  
## stat157     -1.637e-05  2.467e-04  -0.066  0.94709    
## stat158     -3.325e-04  2.511e-04  -1.324  0.18548    
## stat159      1.243e-04  2.460e-04   0.505  0.61331    
## stat160      1.515e-04  2.491e-04   0.608  0.54309    
## stat161      1.421e-04  2.484e-04   0.572  0.56724    
## stat162      1.916e-04  2.448e-04   0.783  0.43388    
## stat163      1.102e-04  2.499e-04   0.441  0.65927    
## stat164      6.529e-05  2.499e-04   0.261  0.79392    
## stat165      6.332e-05  2.461e-04   0.257  0.79698    
## stat166     -3.184e-04  2.448e-04  -1.301  0.19340    
## stat167     -1.192e-04  2.467e-04  -0.483  0.62888    
## stat168      1.678e-06  2.470e-04   0.007  0.99458    
## stat169     -8.484e-05  2.484e-04  -0.342  0.73269    
## stat170     -6.142e-06  2.472e-04  -0.025  0.98018    
## stat171      4.180e-07  2.512e-04   0.002  0.99867    
## stat172      2.706e-04  2.460e-04   1.100  0.27145    
## stat173     -1.686e-04  2.494e-04  -0.676  0.49893    
## stat174     -8.917e-05  2.478e-04  -0.360  0.71900    
## stat175     -5.480e-04  2.485e-04  -2.205  0.02750 *  
## stat176      1.059e-04  2.480e-04   0.427  0.66930    
## stat177     -1.307e-04  2.481e-04  -0.527  0.59848    
## stat178     -1.303e-05  2.501e-04  -0.052  0.95845    
## stat179      2.615e-04  2.480e-04   1.054  0.29179    
## stat180     -5.409e-04  2.460e-04  -2.199  0.02791 *  
## stat181      2.874e-04  2.481e-04   1.158  0.24674    
## stat182     -7.420e-05  2.488e-04  -0.298  0.76557    
## stat183     -1.106e-04  2.475e-04  -0.447  0.65501    
## stat184      5.259e-05  2.484e-04   0.212  0.83235    
## stat185     -2.140e-04  2.443e-04  -0.876  0.38107    
## stat186     -1.500e-04  2.488e-04  -0.603  0.54672    
## stat187     -2.712e-04  2.474e-04  -1.096  0.27302    
## stat188     -2.179e-04  2.475e-04  -0.880  0.37868    
## stat189      5.510e-05  2.474e-04   0.223  0.82378    
## stat190     -1.139e-04  2.480e-04  -0.459  0.64602    
## stat191     -3.396e-04  2.485e-04  -1.367  0.17180    
## stat192      1.112e-04  2.496e-04   0.446  0.65586    
## stat193     -2.441e-04  2.499e-04  -0.977  0.32868    
## stat194      2.095e-04  2.469e-04   0.848  0.39622    
## stat195      3.117e-04  2.465e-04   1.265  0.20599    
## stat196     -6.684e-05  2.509e-04  -0.266  0.78994    
## stat197      2.593e-04  2.454e-04   1.057  0.29071    
## stat198     -3.316e-04  2.485e-04  -1.335  0.18202    
## stat199      1.953e-04  2.461e-04   0.794  0.42748    
## stat200     -2.023e-04  2.435e-04  -0.831  0.40612    
## stat201     -8.103e-05  2.473e-04  -0.328  0.74322    
## stat202     -3.188e-04  2.524e-04  -1.263  0.20665    
## stat203      1.211e-04  2.467e-04   0.491  0.62341    
## stat204     -5.996e-04  2.458e-04  -2.439  0.01476 *  
## stat205     -2.209e-04  2.468e-04  -0.895  0.37079    
## stat206     -1.432e-04  2.505e-04  -0.572  0.56751    
## stat207      4.225e-04  2.479e-04   1.704  0.08844 .  
## stat208      1.188e-04  2.468e-04   0.481  0.63036    
## stat209     -1.294e-04  2.468e-04  -0.524  0.60004    
## stat210     -7.913e-05  2.483e-04  -0.319  0.74997    
## stat211     -4.470e-05  2.489e-04  -0.180  0.85752    
## stat212      2.229e-05  2.475e-04   0.090  0.92824    
## stat213     -3.187e-04  2.496e-04  -1.277  0.20165    
## stat214     -3.946e-04  2.472e-04  -1.597  0.11041    
## stat215     -2.788e-04  2.474e-04  -1.127  0.25983    
## stat216     -1.133e-04  2.481e-04  -0.457  0.64779    
## stat217      2.681e-04  2.483e-04   1.080  0.28037    
## x18.sqrt     2.587e-02  9.422e-04  27.461  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03136 on 5343 degrees of freedom
## Multiple R-squared:  0.2678, Adjusted R-squared:  0.235 
## F-statistic: 8.144 on 240 and 5343 DF,  p-value: < 2.2e-16
cd.full = plot.diagnostics(model=model.full, train=data.train)

## [1] "Number of data points that have Cook's D > 4/n: 292"
## [1] "Number of data points that have Cook's D > 1: 0"

Checking with removal of high influence points

high.cd = names(cd.full[cd.full > 4/nrow(data.train)])

#save dataset with high.cd flagged
t = data.train %>% 
  rownames_to_column() %>%
  mutate(high.cd = ifelse(rowname %in% high.cd,1,0))
#write.csv(t,file='data_high_cd_flag.csv',row.names = F)
###
data.train2 = data.train[!(rownames(data.train)) %in% high.cd,]
model.full2 = lm(formula , data.train2)
summary(model.full2)
## 
## Call:
## lm(formula = formula, data = data.train2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.057402 -0.017407 -0.002489  0.016456  0.069740 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.958e+00  7.793e-03 251.224  < 2e-16 ***
## x1          -4.485e-04  5.344e-04  -0.839 0.401415    
## x2           4.589e-04  3.391e-04   1.354 0.175951    
## x3          -3.649e-05  9.361e-05  -0.390 0.696695    
## x4          -4.964e-05  7.376e-06  -6.730 1.89e-11 ***
## x5           3.681e-04  2.404e-04   1.531 0.125876    
## x6          -4.350e-04  4.841e-04  -0.899 0.368928    
## x7           1.162e-02  5.212e-04  22.298  < 2e-16 ***
## x8           5.058e-04  1.216e-04   4.159 3.26e-05 ***
## x9           3.042e-03  2.697e-04  11.277  < 2e-16 ***
## x10          1.697e-03  2.522e-04   6.728 1.92e-11 ***
## x11          2.168e+05  6.030e+04   3.595 0.000327 ***
## x12          2.833e-04  1.534e-04   1.848 0.064727 .  
## x13          1.437e-04  6.122e-05   2.347 0.018940 *  
## x14         -1.507e-04  2.645e-04  -0.570 0.568935    
## x15          4.930e-05  2.509e-04   0.197 0.844209    
## x16          1.021e-03  1.746e-04   5.849 5.26e-09 ***
## x17          1.888e-03  2.657e-04   7.106 1.36e-12 ***
## x19          3.021e-05  1.348e-04   0.224 0.822685    
## x20         -7.862e-04  9.437e-04  -0.833 0.404837    
## x21          1.084e-04  3.448e-05   3.143 0.001681 ** 
## x22         -5.405e-04  2.825e-04  -1.913 0.055802 .  
## x23          4.950e-04  2.690e-04   1.840 0.065838 .  
## stat1       -1.969e-04  2.024e-04  -0.973 0.330548    
## stat2        1.577e-04  2.009e-04   0.785 0.432425    
## stat3        4.829e-04  2.026e-04   2.383 0.017193 *  
## stat4       -5.465e-04  2.041e-04  -2.678 0.007434 ** 
## stat5       -2.116e-04  2.028e-04  -1.043 0.296854    
## stat6       -1.450e-04  2.027e-04  -0.716 0.474318    
## stat7       -9.444e-05  2.020e-04  -0.468 0.640110    
## stat8        1.058e-04  2.021e-04   0.524 0.600633    
## stat9       -3.038e-04  2.021e-04  -1.503 0.132819    
## stat10      -1.673e-04  2.024e-04  -0.827 0.408333    
## stat11      -1.519e-04  2.036e-04  -0.746 0.455811    
## stat12       2.748e-04  2.017e-04   1.363 0.173082    
## stat13      -6.344e-04  2.010e-04  -3.156 0.001608 ** 
## stat14      -9.291e-04  2.009e-04  -4.624 3.85e-06 ***
## stat15      -4.833e-04  2.007e-04  -2.408 0.016094 *  
## stat16      -3.055e-05  2.015e-04  -0.152 0.879510    
## stat17       5.819e-05  2.009e-04   0.290 0.772084    
## stat18      -3.256e-04  2.021e-04  -1.611 0.107298    
## stat19      -5.567e-05  2.020e-04  -0.276 0.782908    
## stat20       3.081e-04  2.031e-04   1.517 0.129430    
## stat21       7.033e-05  2.028e-04   0.347 0.728740    
## stat22      -1.803e-04  2.032e-04  -0.887 0.374943    
## stat23       5.277e-04  2.023e-04   2.609 0.009108 ** 
## stat24      -4.754e-04  2.033e-04  -2.339 0.019380 *  
## stat25      -4.074e-04  2.016e-04  -2.021 0.043330 *  
## stat26      -4.563e-04  2.021e-04  -2.258 0.023987 *  
## stat27       2.053e-05  2.033e-04   0.101 0.919565    
## stat28      -9.001e-07  2.027e-04  -0.004 0.996457    
## stat29       1.711e-04  2.038e-04   0.840 0.401068    
## stat30       9.739e-05  2.029e-04   0.480 0.631216    
## stat31       1.248e-04  2.036e-04   0.613 0.539965    
## stat32       2.258e-04  2.032e-04   1.111 0.266524    
## stat33      -4.061e-04  2.023e-04  -2.007 0.044782 *  
## stat34       2.009e-04  2.019e-04   0.995 0.319766    
## stat35      -5.055e-04  2.020e-04  -2.502 0.012365 *  
## stat36      -1.727e-04  2.009e-04  -0.860 0.389954    
## stat37      -7.459e-05  2.042e-04  -0.365 0.714877    
## stat38       5.699e-04  2.012e-04   2.832 0.004647 ** 
## stat39      -2.856e-04  1.998e-04  -1.429 0.152967    
## stat40      -6.039e-05  2.026e-04  -0.298 0.765707    
## stat41      -6.258e-04  2.005e-04  -3.121 0.001814 ** 
## stat42      -1.348e-04  2.012e-04  -0.670 0.502989    
## stat43      -1.203e-04  2.032e-04  -0.592 0.553689    
## stat44      -2.554e-07  2.026e-04  -0.001 0.998994    
## stat45      -2.444e-04  2.024e-04  -1.207 0.227357    
## stat46       4.196e-05  2.024e-04   0.207 0.835776    
## stat47       4.968e-05  2.044e-04   0.243 0.807946    
## stat48       3.212e-04  2.022e-04   1.589 0.112155    
## stat49      -2.172e-05  2.019e-04  -0.108 0.914355    
## stat50       1.866e-04  1.999e-04   0.934 0.350530    
## stat51       1.251e-04  2.021e-04   0.619 0.535916    
## stat52       2.149e-04  2.027e-04   1.060 0.288992    
## stat53      -1.514e-04  2.042e-04  -0.742 0.458257    
## stat54      -2.520e-04  2.037e-04  -1.237 0.216170    
## stat55       9.376e-05  2.006e-04   0.467 0.640165    
## stat56       1.495e-04  2.026e-04   0.738 0.460753    
## stat57      -2.171e-04  2.006e-04  -1.082 0.279390    
## stat58      -4.828e-05  2.014e-04  -0.240 0.810583    
## stat59       1.827e-04  2.018e-04   0.905 0.365417    
## stat60       4.344e-04  2.024e-04   2.146 0.031926 *  
## stat61      -5.697e-05  2.028e-04  -0.281 0.778801    
## stat62      -3.749e-04  2.016e-04  -1.859 0.063040 .  
## stat63      -1.158e-04  2.022e-04  -0.573 0.566924    
## stat64       1.159e-06  2.011e-04   0.006 0.995403    
## stat65      -4.208e-04  2.045e-04  -2.058 0.039685 *  
## stat66      -3.552e-05  2.044e-04  -0.174 0.862029    
## stat67       3.680e-04  2.035e-04   1.809 0.070558 .  
## stat68      -2.074e-04  2.032e-04  -1.021 0.307309    
## stat69      -1.860e-04  2.031e-04  -0.916 0.359812    
## stat70       3.842e-04  2.011e-04   1.910 0.056199 .  
## stat71       1.170e-04  2.015e-04   0.581 0.561453    
## stat72       2.540e-05  2.031e-04   0.125 0.900490    
## stat73       3.495e-04  2.037e-04   1.716 0.086233 .  
## stat74       2.305e-04  2.030e-04   1.135 0.256427    
## stat75      -9.731e-06  2.044e-04  -0.048 0.962032    
## stat76       5.054e-05  2.031e-04   0.249 0.803475    
## stat77      -1.055e-04  2.022e-04  -0.522 0.601826    
## stat78      -3.665e-04  2.012e-04  -1.821 0.068629 .  
## stat79      -1.757e-05  2.011e-04  -0.087 0.930358    
## stat80       2.436e-04  2.024e-04   1.204 0.228752    
## stat81       1.247e-04  2.030e-04   0.614 0.539058    
## stat82      -2.036e-05  2.024e-04  -0.101 0.919862    
## stat83       1.422e-04  2.020e-04   0.704 0.481610    
## stat84      -1.581e-04  2.023e-04  -0.782 0.434413    
## stat85      -2.017e-04  2.018e-04  -0.999 0.317666    
## stat86       5.429e-04  2.023e-04   2.684 0.007301 ** 
## stat87      -2.278e-04  2.033e-04  -1.121 0.262388    
## stat88      -6.315e-05  2.010e-04  -0.314 0.753354    
## stat89      -4.190e-05  2.007e-04  -0.209 0.834618    
## stat90      -4.168e-04  2.029e-04  -2.055 0.039970 *  
## stat91      -4.513e-04  2.010e-04  -2.245 0.024804 *  
## stat92       9.969e-06  2.023e-04   0.049 0.960701    
## stat93       1.250e-05  2.047e-04   0.061 0.951289    
## stat94       2.366e-04  2.009e-04   1.178 0.238958    
## stat95       1.654e-04  2.034e-04   0.813 0.416096    
## stat96      -2.403e-04  2.012e-04  -1.195 0.232322    
## stat97       3.326e-05  2.011e-04   0.165 0.868651    
## stat98       3.394e-03  1.997e-04  16.990  < 2e-16 ***
## stat99       1.566e-04  2.034e-04   0.770 0.441340    
## stat100      5.538e-04  2.026e-04   2.733 0.006302 ** 
## stat101     -3.316e-05  2.044e-04  -0.162 0.871154    
## stat102      1.675e-04  2.034e-04   0.824 0.410127    
## stat103     -5.062e-05  2.040e-04  -0.248 0.804071    
## stat104     -1.259e-05  2.030e-04  -0.062 0.950571    
## stat105      3.880e-04  2.005e-04   1.935 0.053013 .  
## stat106     -8.101e-05  2.013e-04  -0.402 0.687381    
## stat107     -1.209e-04  2.021e-04  -0.598 0.549576    
## stat108     -1.702e-04  2.020e-04  -0.843 0.399474    
## stat109     -1.844e-04  2.014e-04  -0.916 0.359922    
## stat110     -3.418e-03  2.025e-04 -16.881  < 2e-16 ***
## stat111      3.945e-05  2.010e-04   0.196 0.844417    
## stat112     -6.677e-05  2.049e-04  -0.326 0.744554    
## stat113     -8.058e-05  2.025e-04  -0.398 0.690665    
## stat114      2.428e-04  2.018e-04   1.203 0.228979    
## stat115      2.145e-04  2.025e-04   1.059 0.289495    
## stat116      3.678e-04  2.036e-04   1.806 0.070903 .  
## stat117      1.865e-04  2.037e-04   0.915 0.360023    
## stat118     -1.061e-05  2.009e-04  -0.053 0.957861    
## stat119      2.434e-04  2.024e-04   1.202 0.229264    
## stat120     -3.393e-05  2.025e-04  -0.168 0.866951    
## stat121     -1.850e-04  2.032e-04  -0.911 0.362589    
## stat122     -9.087e-05  2.011e-04  -0.452 0.651335    
## stat123      1.432e-04  2.037e-04   0.703 0.482178    
## stat124     -2.001e-04  2.032e-04  -0.985 0.324758    
## stat125     -4.645e-05  2.032e-04  -0.229 0.819212    
## stat126      2.135e-04  2.016e-04   1.059 0.289809    
## stat127      8.548e-05  2.006e-04   0.426 0.670073    
## stat128     -3.362e-04  2.016e-04  -1.667 0.095558 .  
## stat129      5.658e-05  2.007e-04   0.282 0.778061    
## stat130      2.695e-04  2.011e-04   1.340 0.180210    
## stat131      3.673e-05  2.020e-04   0.182 0.855707    
## stat132     -1.013e-04  2.009e-04  -0.504 0.614039    
## stat133      4.747e-05  2.021e-04   0.235 0.814332    
## stat134     -1.031e-04  2.007e-04  -0.514 0.607371    
## stat135     -9.950e-05  2.030e-04  -0.490 0.624098    
## stat136     -4.771e-04  2.035e-04  -2.344 0.019097 *  
## stat137      1.371e-04  2.000e-04   0.685 0.493226    
## stat138     -1.019e-04  2.013e-04  -0.506 0.612851    
## stat139     -5.271e-05  2.035e-04  -0.259 0.795626    
## stat140      2.777e-04  2.005e-04   1.385 0.166130    
## stat141      4.438e-04  2.012e-04   2.206 0.027411 *  
## stat142      9.528e-05  2.040e-04   0.467 0.640452    
## stat143     -1.636e-05  2.028e-04  -0.081 0.935711    
## stat144      4.676e-04  2.011e-04   2.325 0.020090 *  
## stat145     -6.764e-06  2.051e-04  -0.033 0.973692    
## stat146     -7.726e-04  2.024e-04  -3.816 0.000137 ***
## stat147     -2.043e-04  2.039e-04  -1.002 0.316423    
## stat148     -2.628e-04  2.014e-04  -1.305 0.191996    
## stat149     -4.906e-04  2.034e-04  -2.412 0.015881 *  
## stat150     -2.339e-05  2.033e-04  -0.115 0.908411    
## stat151      2.439e-05  2.047e-04   0.119 0.905171    
## stat152     -9.612e-05  2.022e-04  -0.475 0.634569    
## stat153      3.326e-04  2.047e-04   1.625 0.104314    
## stat154      2.337e-05  2.037e-04   0.115 0.908648    
## stat155      2.869e-04  2.016e-04   1.423 0.154790    
## stat156      3.253e-04  2.037e-04   1.597 0.110313    
## stat157      1.091e-04  2.015e-04   0.542 0.588171    
## stat158      2.764e-05  2.051e-04   0.135 0.892782    
## stat159      2.493e-04  2.007e-04   1.242 0.214385    
## stat160      4.662e-05  2.040e-04   0.229 0.819223    
## stat161     -5.186e-05  2.028e-04  -0.256 0.798158    
## stat162      1.695e-04  1.998e-04   0.848 0.396326    
## stat163      1.241e-04  2.049e-04   0.606 0.544809    
## stat164     -1.805e-04  2.044e-04  -0.883 0.377147    
## stat165      8.224e-05  2.014e-04   0.408 0.683043    
## stat166     -2.284e-04  1.997e-04  -1.144 0.252631    
## stat167     -1.492e-04  2.016e-04  -0.740 0.459319    
## stat168     -3.093e-05  2.011e-04  -0.154 0.877762    
## stat169     -2.426e-04  2.033e-04  -1.193 0.232902    
## stat170      6.964e-05  2.015e-04   0.346 0.729656    
## stat171     -8.456e-05  2.050e-04  -0.413 0.679972    
## stat172      5.647e-04  2.005e-04   2.817 0.004868 ** 
## stat173     -3.167e-05  2.036e-04  -0.156 0.876411    
## stat174      3.106e-06  2.026e-04   0.015 0.987772    
## stat175     -4.993e-04  2.028e-04  -2.462 0.013844 *  
## stat176     -1.971e-04  2.027e-04  -0.972 0.330934    
## stat177     -4.236e-04  2.025e-04  -2.091 0.036536 *  
## stat178     -3.418e-05  2.043e-04  -0.167 0.867136    
## stat179      2.119e-04  2.024e-04   1.047 0.295155    
## stat180     -4.758e-04  2.016e-04  -2.361 0.018285 *  
## stat181      3.535e-04  2.025e-04   1.745 0.080966 .  
## stat182      1.388e-04  2.039e-04   0.681 0.496092    
## stat183      6.291e-05  2.023e-04   0.311 0.755798    
## stat184      3.568e-04  2.024e-04   1.763 0.077933 .  
## stat185     -3.456e-05  1.993e-04  -0.173 0.862363    
## stat186      1.472e-04  2.032e-04   0.725 0.468726    
## stat187     -2.581e-04  2.017e-04  -1.280 0.200651    
## stat188     -2.191e-05  2.022e-04  -0.108 0.913714    
## stat189     -2.893e-05  2.025e-04  -0.143 0.886360    
## stat190     -3.321e-04  2.030e-04  -1.636 0.101942    
## stat191     -4.327e-04  2.027e-04  -2.135 0.032820 *  
## stat192     -4.255e-06  2.041e-04  -0.021 0.983373    
## stat193     -1.323e-04  2.044e-04  -0.647 0.517416    
## stat194     -4.192e-05  2.020e-04  -0.207 0.835654    
## stat195      1.785e-04  2.014e-04   0.886 0.375539    
## stat196     -1.287e-04  2.049e-04  -0.628 0.530144    
## stat197      1.724e-05  2.007e-04   0.086 0.931560    
## stat198     -3.170e-04  2.029e-04  -1.562 0.118338    
## stat199      1.249e-04  2.010e-04   0.621 0.534479    
## stat200     -1.739e-04  1.993e-04  -0.873 0.382730    
## stat201      4.440e-06  2.026e-04   0.022 0.982520    
## stat202     -1.694e-04  2.061e-04  -0.822 0.411157    
## stat203      1.650e-04  2.016e-04   0.818 0.413145    
## stat204     -1.992e-04  2.012e-04  -0.990 0.322336    
## stat205      7.847e-05  2.011e-04   0.390 0.696387    
## stat206     -3.098e-04  2.046e-04  -1.514 0.130001    
## stat207      4.263e-04  2.028e-04   2.102 0.035603 *  
## stat208      2.016e-04  2.021e-04   0.998 0.318446    
## stat209     -5.689e-07  2.012e-04  -0.003 0.997744    
## stat210     -2.808e-04  2.029e-04  -1.384 0.166298    
## stat211     -4.572e-05  2.038e-04  -0.224 0.822500    
## stat212      1.117e-04  2.023e-04   0.552 0.580940    
## stat213     -2.776e-04  2.034e-04  -1.365 0.172341    
## stat214     -2.307e-04  2.018e-04  -1.143 0.252911    
## stat215     -1.129e-04  2.025e-04  -0.557 0.577312    
## stat216     -1.670e-04  2.023e-04  -0.826 0.409076    
## stat217      1.638e-04  2.026e-04   0.809 0.418726    
## x18.sqrt     2.558e-02  7.679e-04  33.311  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.02491 on 5051 degrees of freedom
## Multiple R-squared:  0.3734, Adjusted R-squared:  0.3437 
## F-statistic: 12.54 on 240 and 5051 DF,  p-value: < 2.2e-16
cd.full2 = plot.diagnostics(model.full2, data.train2)

## [1] "Number of data points that have Cook's D > 4/n: 265"
## [1] "Number of data points that have Cook's D > 1: 0"
# much more normal residuals than before. 
# Checking to see if distributions are different and if so whcih variables
# High Leverage Plot 
plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,target=one_of(label.names))

ggplot(data=plotData, aes(x=type,y=target)) +
  geom_boxplot(fill='light blue',outlier.shape=NA) +
  scale_y_continuous(name="Target Variable Values",label=scales::comma_format(accuracy=.1)) +
  theme_light() +
  ggtitle('Distribution of High Leverage Points and Normal  Points')

# 2 sample t-tests

plotData = data.train %>% 
  rownames_to_column() %>%
  mutate(type=ifelse(rowname %in% high.cd,'High','Normal')) %>%
  dplyr::select(type,one_of(feature.names))

comp.test = lapply(dplyr::select(plotData, one_of(feature.names))
                   , function(x) t.test(x ~ plotData$type, var.equal = TRUE)) 

sig.comp = list.filter(comp.test, p.value < 0.05)
sapply(sig.comp, function(x) x[['p.value']])
##           x6        stat4       stat22       stat47       stat82       stat85       stat95       stat98      stat110 
## 4.711617e-02 6.946036e-03 4.924546e-02 1.995137e-02 1.000120e-02 2.362890e-02 4.571544e-02 2.161598e-06 3.971208e-04 
##      stat128      stat146      stat151      stat169      stat172      stat204     x18.sqrt 
## 9.211091e-03 6.623412e-03 1.023297e-02 1.972685e-02 3.262764e-02 2.931977e-02 3.083623e-02
mm = melt(plotData, id=c('type')) %>% filter(variable %in% names(sig.comp))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=5, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

# Distribution (box) Plots
mm = melt(plotData, id=c('type'))

ggplot(mm,aes(x=type, y=value)) +
  geom_boxplot()+
  facet_wrap(~variable, ncol=8, scales = 'free_y') +
  scale_y_continuous(name="values",label=scales::comma_format(accuracy=.1)) +
  ggtitle('Distribution of High Leverage Points and Normal Points')

Grand Means Model

model.null = lm(grand.mean.formula, data.train)
summary(model.null)
## 
## Call:
## lm(formula = grand.mean.formula, data = data.train)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.114839 -0.023677 -0.003183  0.020570  0.175063 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.0967148  0.0004799    4369   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.03586 on 5583 degrees of freedom

Variable Selection

Basic: http://www.stat.columbia.edu/~martin/W2024/R10.pdf Cross Validation + Other Metrics: http://www.sthda.com/english/articles/37-model-selection-essentials-in-r/154-stepwise-regression-essentials-in-r/

Forward Selection with CV

Train

if (algo.forward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   , data = data.train
                                   , method = "leapForward"
                                   , feature.names = feature.names)
  model.forward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 13 on full training set
## [1] "All models results"
##     nvmax       RMSE  Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.03385458 0.1096104 0.02634130 0.0008295538 0.02478797 0.0004982521
## 2       2 0.03308589 0.1497922 0.02574921 0.0010157625 0.03247206 0.0005889199
## 3       3 0.03249415 0.1799293 0.02517980 0.0010498661 0.03378978 0.0006072363
## 4       4 0.03196165 0.2063363 0.02447506 0.0009342472 0.03165626 0.0004690114
## 5       5 0.03171189 0.2187152 0.02429046 0.0009442026 0.03113072 0.0005214975
## 6       6 0.03161733 0.2231860 0.02421222 0.0009393708 0.02922561 0.0005173032
## 7       7 0.03160116 0.2241139 0.02422636 0.0009247879 0.02858110 0.0004891560
## 8       8 0.03148146 0.2300943 0.02414098 0.0009244857 0.02881080 0.0004732991
## 9       9 0.03148587 0.2299594 0.02416144 0.0009477416 0.02913802 0.0004982399
## 10     10 0.03148711 0.2298547 0.02415440 0.0009739512 0.02987084 0.0005053064
## 11     11 0.03148089 0.2301187 0.02416120 0.0009953110 0.02990456 0.0005364930
## 12     12 0.03148470 0.2299522 0.02416349 0.0009990068 0.02944639 0.0005495697
## 13     13 0.03145749 0.2311967 0.02414219 0.0009791909 0.02841144 0.0005384081
## 14     14 0.03147437 0.2303786 0.02416961 0.0009806357 0.02822536 0.0005623535
## 15     15 0.03146569 0.2307427 0.02417507 0.0009936339 0.02783595 0.0005714702
## 16     16 0.03148991 0.2296581 0.02420039 0.0009779886 0.02712579 0.0005488794
## 17     17 0.03150643 0.2289046 0.02421673 0.0009893667 0.02727567 0.0005628079
## 18     18 0.03149982 0.2292535 0.02420143 0.0009945389 0.02708799 0.0005689028
## 19     19 0.03148965 0.2297416 0.02418990 0.0010067669 0.02740872 0.0005775336
## 20     20 0.03148734 0.2299763 0.02421181 0.0010281631 0.02837203 0.0006011993
## 21     21 0.03149039 0.2298256 0.02421445 0.0010305432 0.02825896 0.0005965495
## 22     22 0.03149562 0.2296796 0.02421640 0.0010353947 0.02841205 0.0006094047
## 23     23 0.03151241 0.2288523 0.02422678 0.0010342105 0.02796473 0.0006113076
## 24     24 0.03150123 0.2293981 0.02421464 0.0010382557 0.02814206 0.0006112747
## 25     25 0.03150030 0.2294055 0.02421937 0.0010259090 0.02723670 0.0006036940
## 26     26 0.03148503 0.2301484 0.02421383 0.0010383896 0.02784105 0.0006018764
## 27     27 0.03148870 0.2299910 0.02421166 0.0010409668 0.02805096 0.0006223322
## 28     28 0.03149164 0.2298689 0.02421446 0.0010475865 0.02817762 0.0006217105
## 29     29 0.03150762 0.2291131 0.02423181 0.0010454282 0.02854621 0.0006274450
## 30     30 0.03150194 0.2294108 0.02421748 0.0010356148 0.02873647 0.0006150540
## 31     31 0.03152398 0.2284308 0.02422189 0.0010229984 0.02839326 0.0005956601
## 32     32 0.03152726 0.2282500 0.02423046 0.0010176376 0.02821051 0.0005934954
## 33     33 0.03152665 0.2283797 0.02423569 0.0010279635 0.02893915 0.0006031575
## 34     34 0.03151796 0.2287626 0.02422206 0.0010203039 0.02871491 0.0005815572
## 35     35 0.03152188 0.2286335 0.02421730 0.0010328349 0.02996362 0.0005969662
## 36     36 0.03151609 0.2289389 0.02420336 0.0010522874 0.03060404 0.0005997562
## 37     37 0.03153971 0.2278691 0.02421355 0.0010513591 0.03040757 0.0006061880
## 38     38 0.03152108 0.2287407 0.02420077 0.0010539417 0.03086271 0.0006036952
## 39     39 0.03154157 0.2278284 0.02421705 0.0010635866 0.03097179 0.0006032555
## 40     40 0.03154767 0.2275729 0.02422286 0.0010903027 0.03158575 0.0006327650
## 41     41 0.03156603 0.2266967 0.02423231 0.0010887132 0.03168584 0.0006300055
## 42     42 0.03158288 0.2259341 0.02425113 0.0010921414 0.03163684 0.0006311692
## 43     43 0.03159212 0.2255308 0.02425169 0.0010906894 0.03155566 0.0006225412
## 44     44 0.03160760 0.2248204 0.02426772 0.0010926506 0.03128084 0.0006181163
## 45     45 0.03161962 0.2243067 0.02427944 0.0011005990 0.03134491 0.0006129422
## 46     46 0.03163727 0.2235394 0.02428605 0.0010961284 0.03125069 0.0006144160
## 47     47 0.03165256 0.2228692 0.02429918 0.0010894147 0.03069431 0.0006165416
## 48     48 0.03168448 0.2214048 0.02433259 0.0010861216 0.03052200 0.0006092059
## 49     49 0.03168228 0.2215498 0.02433165 0.0011059340 0.03156581 0.0006256120
## 50     50 0.03169873 0.2207926 0.02433367 0.0011040089 0.03138761 0.0006304474
## 51     51 0.03170625 0.2205027 0.02434531 0.0011125692 0.03152805 0.0006261585
## 52     52 0.03172888 0.2195189 0.02436345 0.0011210757 0.03190875 0.0006326619
## 53     53 0.03173833 0.2191255 0.02437395 0.0011227931 0.03201558 0.0006229525
## 54     54 0.03173091 0.2195036 0.02437030 0.0011309028 0.03231791 0.0006414506
## 55     55 0.03175009 0.2186384 0.02437952 0.0011207532 0.03211231 0.0006263079
## 56     56 0.03177631 0.2174751 0.02440000 0.0011298996 0.03218819 0.0006298855
## 57     57 0.03179105 0.2168322 0.02441954 0.0011327483 0.03239828 0.0006311695
## 58     58 0.03179543 0.2166167 0.02441551 0.0011226956 0.03194527 0.0006248217
## 59     59 0.03179682 0.2165954 0.02441419 0.0011132396 0.03136437 0.0006204672
## 60     60 0.03180899 0.2160667 0.02442704 0.0011158744 0.03151877 0.0006271045
## 61     61 0.03181399 0.2158287 0.02443812 0.0011170225 0.03166624 0.0006187309
## 62     62 0.03182950 0.2151802 0.02445959 0.0011207396 0.03164840 0.0006338023
## 63     63 0.03183159 0.2150761 0.02446088 0.0011141326 0.03147101 0.0006301800
## 64     64 0.03183710 0.2148396 0.02446407 0.0011021557 0.03087915 0.0006181052
## 65     65 0.03184674 0.2144377 0.02447586 0.0010948741 0.03065578 0.0006072468
## 66     66 0.03184914 0.2143324 0.02447817 0.0010910051 0.03040860 0.0006095775
## 67     67 0.03184564 0.2145554 0.02447919 0.0011023150 0.03091561 0.0006222728
## 68     68 0.03184676 0.2145530 0.02448316 0.0011039506 0.03104141 0.0006164407
## 69     69 0.03184237 0.2147087 0.02448071 0.0010965171 0.03086172 0.0006175814
## 70     70 0.03183447 0.2151060 0.02447435 0.0011013035 0.03068614 0.0006185848
## 71     71 0.03183614 0.2150547 0.02447275 0.0011129815 0.03074796 0.0006288891
## 72     72 0.03184422 0.2147404 0.02448480 0.0011134711 0.03095384 0.0006268541
## 73     73 0.03186211 0.2139748 0.02450351 0.0011112713 0.03063664 0.0006279500
## 74     74 0.03186912 0.2137213 0.02451864 0.0011178984 0.03088926 0.0006297621
## 75     75 0.03188033 0.2133161 0.02452957 0.0011354314 0.03190261 0.0006408760
## 76     76 0.03189291 0.2127628 0.02454267 0.0011419099 0.03193809 0.0006482512
## 77     77 0.03189488 0.2126862 0.02454392 0.0011294984 0.03129447 0.0006364631
## 78     78 0.03190201 0.2123742 0.02454723 0.0011381035 0.03155580 0.0006367078
## 79     79 0.03189664 0.2126411 0.02453894 0.0011454135 0.03185436 0.0006396544
## 80     80 0.03190974 0.2120906 0.02454493 0.0011522273 0.03222378 0.0006508030
## 81     81 0.03191369 0.2119374 0.02454340 0.0011595066 0.03230649 0.0006553162
## 82     82 0.03191874 0.2117034 0.02454972 0.0011578958 0.03201588 0.0006556645
## 83     83 0.03192325 0.2115324 0.02455813 0.0011465857 0.03170870 0.0006432353
## 84     84 0.03193218 0.2111023 0.02455917 0.0011379002 0.03128737 0.0006367204
## 85     85 0.03194500 0.2105574 0.02457105 0.0011367092 0.03109805 0.0006391154
## 86     86 0.03194854 0.2104327 0.02457569 0.0011424890 0.03130890 0.0006436462
## 87     87 0.03195029 0.2103709 0.02457721 0.0011355757 0.03104367 0.0006398209
## 88     88 0.03194445 0.2106402 0.02457548 0.0011282636 0.03081283 0.0006468699
## 89     89 0.03195891 0.2100413 0.02458663 0.0011272111 0.03068918 0.0006430510
## 90     90 0.03195401 0.2102664 0.02457459 0.0011397955 0.03124134 0.0006584990
## 91     91 0.03196393 0.2098952 0.02457780 0.0011390448 0.03124107 0.0006572308
## 92     92 0.03197206 0.2095377 0.02458189 0.0011316200 0.03099955 0.0006560113
## 93     93 0.03198363 0.2091021 0.02458806 0.0011300113 0.03087385 0.0006599111
## 94     94 0.03198373 0.2091618 0.02458885 0.0011258313 0.03073495 0.0006544332
## 95     95 0.03198670 0.2090266 0.02459468 0.0011248481 0.03083401 0.0006528552
## 96     96 0.03198783 0.2089614 0.02459973 0.0011251973 0.03087446 0.0006562168
## 97     97 0.03198701 0.2090226 0.02459331 0.0011293460 0.03119994 0.0006621649
## 98     98 0.03200057 0.2085114 0.02459744 0.0011333184 0.03124247 0.0006674357
## 99     99 0.03199983 0.2085464 0.02459378 0.0011410863 0.03154115 0.0006694835
## 100   100 0.03199238 0.2088706 0.02458552 0.0011424034 0.03146923 0.0006790156
## 101   101 0.03199740 0.2086912 0.02458649 0.0011506824 0.03185690 0.0006886233
## 102   102 0.03200290 0.2084651 0.02459492 0.0011503489 0.03177499 0.0006795861
## 103   103 0.03200293 0.2084787 0.02459540 0.0011499730 0.03185931 0.0006767545
## 104   104 0.03200354 0.2084457 0.02459592 0.0011451080 0.03151677 0.0006740884
## 105   105 0.03200601 0.2083357 0.02459645 0.0011443595 0.03151951 0.0006806581
## 106   106 0.03200619 0.2083268 0.02459972 0.0011402674 0.03140935 0.0006821767
## 107   107 0.03201305 0.2080327 0.02460193 0.0011316479 0.03112423 0.0006722950
## 108   108 0.03201378 0.2080112 0.02459436 0.0011285986 0.03112527 0.0006694736
## 109   109 0.03201712 0.2078442 0.02459618 0.0011291006 0.03116477 0.0006703776
## 110   110 0.03201495 0.2079198 0.02459509 0.0011301334 0.03123062 0.0006694449
## 111   111 0.03201965 0.2077687 0.02459770 0.0011378391 0.03158282 0.0006741437
## 112   112 0.03202471 0.2075414 0.02460269 0.0011457676 0.03191216 0.0006817749
## 113   113 0.03202648 0.2074941 0.02460188 0.0011489600 0.03206860 0.0006859216
## 114   114 0.03202953 0.2073788 0.02460383 0.0011459513 0.03205737 0.0006875130
## 115   115 0.03203220 0.2072700 0.02460418 0.0011413245 0.03178745 0.0006817709
## 116   116 0.03203835 0.2070403 0.02461206 0.0011478424 0.03196953 0.0006858353
## 117   117 0.03204683 0.2066827 0.02461840 0.0011425950 0.03168421 0.0006822265
## 118   118 0.03204732 0.2066729 0.02461901 0.0011407043 0.03150565 0.0006756305
## 119   119 0.03204971 0.2065697 0.02462174 0.0011438415 0.03168880 0.0006764938
## 120   120 0.03205113 0.2064778 0.02462514 0.0011429186 0.03164883 0.0006814383
## 121   121 0.03205458 0.2063379 0.02462764 0.0011347764 0.03136545 0.0006741159
## 122   122 0.03205379 0.2064090 0.02462672 0.0011419756 0.03174433 0.0006778833
## 123   123 0.03205280 0.2064513 0.02462466 0.0011409645 0.03165798 0.0006759197
## 124   124 0.03205283 0.2064593 0.02462716 0.0011424141 0.03165763 0.0006747770
## 125   125 0.03204920 0.2066213 0.02462665 0.0011383507 0.03140319 0.0006754891
## 126   126 0.03204892 0.2066330 0.02462417 0.0011359859 0.03137349 0.0006717698
## 127   127 0.03205096 0.2065761 0.02462625 0.0011334792 0.03124219 0.0006719293
## 128   128 0.03205645 0.2063443 0.02462976 0.0011300527 0.03101835 0.0006671213
## 129   129 0.03205879 0.2062567 0.02462930 0.0011301412 0.03111591 0.0006624874
## 130   130 0.03205753 0.2063390 0.02462912 0.0011302508 0.03110370 0.0006588343
## 131   131 0.03205997 0.2062352 0.02463451 0.0011241991 0.03079557 0.0006539486
## 132   132 0.03205856 0.2063145 0.02463140 0.0011217790 0.03083442 0.0006522461
## 133   133 0.03205798 0.2063390 0.02463891 0.0011160781 0.03054042 0.0006439798
## 134   134 0.03206229 0.2061494 0.02464216 0.0011084869 0.03030594 0.0006387108
## 135   135 0.03206697 0.2059970 0.02464547 0.0011174011 0.03077732 0.0006470535
## 136   136 0.03207088 0.2058289 0.02464527 0.0011136306 0.03050577 0.0006467751
## 137   137 0.03206913 0.2059006 0.02464778 0.0011162843 0.03058968 0.0006493711
## 138   138 0.03207566 0.2056283 0.02465507 0.0011155473 0.03050247 0.0006454265
## 139   139 0.03207207 0.2057647 0.02465389 0.0011098581 0.03032386 0.0006436001
## 140   140 0.03207425 0.2056548 0.02465289 0.0011060018 0.03012625 0.0006449472
## 141   141 0.03207457 0.2056452 0.02465251 0.0011009955 0.02985444 0.0006401691
## 142   142 0.03207430 0.2056877 0.02464867 0.0011019350 0.02992069 0.0006385963
## 143   143 0.03207330 0.2057283 0.02465038 0.0011014990 0.02994517 0.0006379622
## 144   144 0.03207869 0.2054839 0.02465693 0.0011012916 0.02978443 0.0006390892
## 145   145 0.03208088 0.2053861 0.02465703 0.0010998213 0.02974541 0.0006392672
## 146   146 0.03208617 0.2051485 0.02466174 0.0010894836 0.02946710 0.0006292576
## 147   147 0.03208623 0.2051495 0.02466201 0.0010854575 0.02932436 0.0006237219
## 148   148 0.03208837 0.2050509 0.02466133 0.0010846587 0.02937443 0.0006207047
## 149   149 0.03209227 0.2048878 0.02466114 0.0010827812 0.02937854 0.0006196246
## 150   150 0.03209235 0.2048965 0.02466248 0.0010859355 0.02961160 0.0006251434
## 151   151 0.03209245 0.2048823 0.02466068 0.0010828886 0.02959406 0.0006226528
## 152   152 0.03209325 0.2048546 0.02466501 0.0010802333 0.02953837 0.0006197364
## 153   153 0.03209090 0.2049578 0.02466355 0.0010855734 0.02969854 0.0006271767
## 154   154 0.03208952 0.2050289 0.02466287 0.0010884877 0.02982372 0.0006272511
## 155   155 0.03208979 0.2050039 0.02466090 0.0010894040 0.02978673 0.0006321751
## 156   156 0.03208433 0.2052513 0.02465435 0.0010910820 0.02980501 0.0006325861
## 157   157 0.03208190 0.2053705 0.02465184 0.0010909230 0.02993580 0.0006302241
## 158   158 0.03208610 0.2051970 0.02465539 0.0010869471 0.02978380 0.0006280394
## 159   159 0.03208514 0.2052309 0.02465376 0.0010891107 0.02983277 0.0006329842
## 160   160 0.03208289 0.2053362 0.02465299 0.0010927928 0.03004625 0.0006351613
## 161   161 0.03208513 0.2052390 0.02465399 0.0010980524 0.03025764 0.0006376889
## 162   162 0.03208405 0.2053007 0.02465445 0.0010976436 0.03018810 0.0006390775
## 163   163 0.03208477 0.2052721 0.02465539 0.0010965207 0.03017224 0.0006374807
## 164   164 0.03208412 0.2052951 0.02465331 0.0010972946 0.03017178 0.0006393013
## 165   165 0.03208396 0.2052955 0.02465313 0.0010918313 0.02998144 0.0006336088
## 166   166 0.03208676 0.2051612 0.02465231 0.0010935109 0.02993785 0.0006348755
## 167   167 0.03208535 0.2052329 0.02464830 0.0010959251 0.03007276 0.0006384679
## 168   168 0.03208659 0.2051887 0.02464848 0.0010984508 0.03011117 0.0006447972
## 169   169 0.03208937 0.2050761 0.02465251 0.0010998241 0.03015737 0.0006476004
## 170   170 0.03209079 0.2050040 0.02465524 0.0011001584 0.03011193 0.0006479421
## 171   171 0.03209271 0.2049418 0.02465569 0.0010978292 0.03005729 0.0006473847
## 172   172 0.03209099 0.2050176 0.02465418 0.0010973456 0.03002143 0.0006472052
## 173   173 0.03209149 0.2049835 0.02465410 0.0010967600 0.02991083 0.0006481944
## 174   174 0.03209094 0.2050143 0.02465250 0.0010976032 0.02997440 0.0006470378
## 175   175 0.03209169 0.2049752 0.02465102 0.0010980381 0.03003935 0.0006487678
## 176   176 0.03209213 0.2049613 0.02465105 0.0010974598 0.03002888 0.0006465534
## 177   177 0.03209028 0.2050431 0.02465094 0.0011005006 0.03020171 0.0006501054
## 178   178 0.03209304 0.2049288 0.02465458 0.0010984931 0.03013958 0.0006460770
## 179   179 0.03209457 0.2048692 0.02465697 0.0010981838 0.03014507 0.0006455129
## 180   180 0.03209880 0.2047040 0.02465897 0.0010989862 0.03025039 0.0006453711
## 181   181 0.03210165 0.2045977 0.02466056 0.0011017504 0.03040096 0.0006438785
## 182   182 0.03210273 0.2045482 0.02466128 0.0011014313 0.03031420 0.0006429578
## 183   183 0.03210347 0.2045247 0.02466139 0.0010988375 0.03026955 0.0006406877
## 184   184 0.03210589 0.2044271 0.02466273 0.0011036727 0.03043728 0.0006457711
## 185   185 0.03210597 0.2044257 0.02466181 0.0011037167 0.03040910 0.0006433408
## 186   186 0.03210420 0.2044988 0.02465811 0.0011051202 0.03050014 0.0006454520
## 187   187 0.03210761 0.2043523 0.02465953 0.0011051731 0.03045573 0.0006443201
## 188   188 0.03210787 0.2043398 0.02466063 0.0011032304 0.03033105 0.0006435954
## 189   189 0.03210751 0.2043498 0.02466154 0.0011040544 0.03029245 0.0006436060
## 190   190 0.03210826 0.2043220 0.02466446 0.0011035717 0.03030565 0.0006430880
## 191   191 0.03210572 0.2044321 0.02466306 0.0011056422 0.03038783 0.0006429226
## 192   192 0.03210889 0.2042929 0.02466381 0.0011051824 0.03039140 0.0006419025
## 193   193 0.03210620 0.2044115 0.02466275 0.0011069860 0.03042055 0.0006446823
## 194   194 0.03210662 0.2044012 0.02466316 0.0011045390 0.03039443 0.0006426465
## 195   195 0.03210861 0.2043164 0.02466373 0.0011033651 0.03033653 0.0006406348
## 196   196 0.03210911 0.2042810 0.02466371 0.0011022041 0.03027123 0.0006392670
## 197   197 0.03211217 0.2041458 0.02466584 0.0011026077 0.03028297 0.0006385982
## 198   198 0.03211090 0.2041969 0.02466462 0.0011029133 0.03028177 0.0006387208
## 199   199 0.03211312 0.2041006 0.02466644 0.0011029932 0.03025385 0.0006398724
## 200   200 0.03211128 0.2041760 0.02466705 0.0011031607 0.03021799 0.0006397630
## 201   201 0.03211183 0.2041562 0.02466698 0.0011036281 0.03023200 0.0006380779
## 202   202 0.03211063 0.2042118 0.02466758 0.0011033300 0.03021874 0.0006381737
## 203   203 0.03211102 0.2041871 0.02466820 0.0011034352 0.03016556 0.0006388333
## 204   204 0.03210900 0.2042772 0.02466674 0.0011043321 0.03024071 0.0006381471
## 205   205 0.03210899 0.2042789 0.02466674 0.0011040811 0.03019419 0.0006379155
## 206   206 0.03210925 0.2042731 0.02466675 0.0011048055 0.03021223 0.0006396567
## 207   207 0.03210745 0.2043563 0.02466516 0.0011046420 0.03023213 0.0006407044
## 208   208 0.03210729 0.2043615 0.02466463 0.0011054014 0.03024237 0.0006412264
## 209   209 0.03210924 0.2042686 0.02466627 0.0011054587 0.03023173 0.0006415652
## 210   210 0.03210940 0.2042757 0.02466676 0.0011074000 0.03032859 0.0006436772
## 211   211 0.03210883 0.2042997 0.02466565 0.0011078063 0.03033795 0.0006444732
## 212   212 0.03210970 0.2042682 0.02466706 0.0011075618 0.03033605 0.0006448931
## 213   213 0.03210888 0.2042934 0.02466580 0.0011065718 0.03030064 0.0006435122
## 214   214 0.03210762 0.2043480 0.02466444 0.0011066733 0.03028133 0.0006446942
## 215   215 0.03210922 0.2042785 0.02466503 0.0011074218 0.03028543 0.0006460986
## 216   216 0.03211078 0.2042151 0.02466665 0.0011067384 0.03026335 0.0006449334
## 217   217 0.03211109 0.2042072 0.02466717 0.0011092032 0.03036328 0.0006470520
## 218   218 0.03211111 0.2042070 0.02466656 0.0011089075 0.03033337 0.0006467482
## 219   219 0.03211051 0.2042295 0.02466618 0.0011083204 0.03031500 0.0006466827
## 220   220 0.03211076 0.2042154 0.02466654 0.0011077153 0.03028606 0.0006450397
## 221   221 0.03211001 0.2042474 0.02466652 0.0011080025 0.03032290 0.0006449498
## 222   222 0.03211061 0.2042185 0.02466710 0.0011079167 0.03033603 0.0006443065
## 223   223 0.03210918 0.2042743 0.02466632 0.0011072305 0.03030832 0.0006432578
## 224   224 0.03210988 0.2042473 0.02466727 0.0011072034 0.03032871 0.0006428482
## 225   225 0.03210996 0.2042470 0.02466728 0.0011070552 0.03031321 0.0006424427
## 226   226 0.03211009 0.2042409 0.02466757 0.0011074967 0.03033923 0.0006428487
## 227   227 0.03211021 0.2042403 0.02466787 0.0011081142 0.03039217 0.0006430474
## 228   228 0.03210967 0.2042650 0.02466704 0.0011083854 0.03039150 0.0006432076
## 229   229 0.03210983 0.2042598 0.02466729 0.0011091435 0.03042026 0.0006439666
## 230   230 0.03210963 0.2042692 0.02466754 0.0011094325 0.03042553 0.0006438673
## 231   231 0.03210959 0.2042718 0.02466768 0.0011091845 0.03042081 0.0006433408
## 232   232 0.03211034 0.2042399 0.02466868 0.0011088124 0.03040087 0.0006427493
## 233   233 0.03211031 0.2042421 0.02466841 0.0011094963 0.03043202 0.0006434366
## 234   234 0.03211014 0.2042491 0.02466836 0.0011092755 0.03042107 0.0006430091
## 235   235 0.03211022 0.2042449 0.02466844 0.0011091303 0.03041803 0.0006434017
## 236   236 0.03210982 0.2042631 0.02466832 0.0011092561 0.03042793 0.0006437501
## 237   237 0.03210997 0.2042566 0.02466848 0.0011088841 0.03040869 0.0006432413
## 238   238 0.03210991 0.2042592 0.02466824 0.0011087237 0.03040023 0.0006431995
## 239   239 0.03210989 0.2042603 0.02466821 0.0011087423 0.03039855 0.0006432065
## 240   240 0.03210993 0.2042585 0.02466824 0.0011087027 0.03039707 0.0006431900
## [1] "Best Model"
##    nvmax
## 13    13

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  1.972681e+00  1.956867e+00  1.988495e+00
## x4          -4.607173e-05 -6.334938e-05 -2.879408e-05
## x7           1.080812e-02  9.583611e-03  1.203263e-02
## x8           4.784756e-04  1.929724e-04  7.639787e-04
## x9           3.106132e-03  2.470605e-03  3.741659e-03
## x10          1.140634e-03  5.481000e-04  1.733168e-03
## x11          2.032330e+05  6.180148e+04  3.446645e+05
## x16          1.063749e-03  6.532151e-04  1.474284e-03
## x17          1.860411e-03  1.237166e-03  2.483656e-03
## stat14      -7.371136e-04 -1.207823e-03 -2.664039e-04
## stat41      -7.435917e-04 -1.216279e-03 -2.709043e-04
## stat98       3.577314e-03  3.108078e-03  4.046550e-03
## stat110     -3.392311e-03 -3.868431e-03 -2.916191e-03
## x18.sqrt     2.586108e-02  2.405283e-02  2.766932e-02

Test

if (algo.forward.caret == TRUE){
    test.model(model=model.forward, test=data.test
             ,method = 'leapForward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.039   2.084   2.097   2.097   2.109   2.145 
## [1] "leapForward  Test MSE: 0.00104530808325543"

Backward Elimination with CV

Train

if (algo.backward.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapBackward"
                                   ,feature.names =  feature.names)
  model.backward = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 13 on full training set
## [1] "All models results"
##     nvmax       RMSE  Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.03385458 0.1096104 0.02634130 0.0008295538 0.02478797 0.0004982521
## 2       2 0.03308589 0.1497922 0.02574921 0.0010157625 0.03247206 0.0005889199
## 3       3 0.03249415 0.1799293 0.02517980 0.0010498661 0.03378978 0.0006072363
## 4       4 0.03196165 0.2063363 0.02447506 0.0009342472 0.03165626 0.0004690114
## 5       5 0.03171189 0.2187152 0.02429046 0.0009442026 0.03113072 0.0005214975
## 6       6 0.03161733 0.2231860 0.02421222 0.0009393708 0.02922561 0.0005173032
## 7       7 0.03160116 0.2241139 0.02422636 0.0009247879 0.02858110 0.0004891560
## 8       8 0.03148146 0.2300943 0.02414098 0.0009244857 0.02881080 0.0004732991
## 9       9 0.03148587 0.2299594 0.02416144 0.0009477416 0.02913802 0.0004982399
## 10     10 0.03148711 0.2298547 0.02415440 0.0009739512 0.02987084 0.0005053064
## 11     11 0.03148089 0.2301187 0.02416120 0.0009953110 0.02990456 0.0005364930
## 12     12 0.03148470 0.2299522 0.02416349 0.0009990068 0.02944639 0.0005495697
## 13     13 0.03145749 0.2311967 0.02414219 0.0009791909 0.02841144 0.0005384081
## 14     14 0.03147437 0.2303786 0.02416961 0.0009806357 0.02822536 0.0005623535
## 15     15 0.03146569 0.2307427 0.02417507 0.0009936339 0.02783595 0.0005714702
## 16     16 0.03148991 0.2296581 0.02420039 0.0009779886 0.02712579 0.0005488794
## 17     17 0.03150643 0.2289046 0.02421673 0.0009893667 0.02727567 0.0005628079
## 18     18 0.03149150 0.2296509 0.02419585 0.0010119458 0.02777386 0.0005791125
## 19     19 0.03149602 0.2294674 0.02421031 0.0010101346 0.02752942 0.0005716861
## 20     20 0.03149150 0.2297889 0.02421325 0.0010355425 0.02848300 0.0006030018
## 21     21 0.03149346 0.2297230 0.02421668 0.0010347906 0.02832849 0.0005992109
## 22     22 0.03148985 0.2299577 0.02420545 0.0010317694 0.02816307 0.0005935233
## 23     23 0.03150560 0.2291566 0.02421693 0.0010302122 0.02771093 0.0005975507
## 24     24 0.03150123 0.2293981 0.02421464 0.0010382557 0.02814206 0.0006112747
## 25     25 0.03150100 0.2293860 0.02422176 0.0010254312 0.02723912 0.0006009309
## 26     26 0.03149183 0.2298245 0.02421489 0.0010347978 0.02786493 0.0006039257
## 27     27 0.03150301 0.2293084 0.02421839 0.0010322024 0.02814775 0.0006183281
## 28     28 0.03151177 0.2288883 0.02423062 0.0010376854 0.02836636 0.0006076962
## 29     29 0.03152629 0.2282336 0.02424962 0.0010350366 0.02874914 0.0006113150
## 30     30 0.03150462 0.2292796 0.02422666 0.0010333894 0.02884529 0.0006052159
## 31     31 0.03152222 0.2285141 0.02422667 0.0010231788 0.02850773 0.0005901465
## 32     32 0.03152166 0.2285409 0.02422637 0.0010202863 0.02803710 0.0005907502
## 33     33 0.03152680 0.2283914 0.02423341 0.0010282465 0.02891453 0.0005996862
## 34     34 0.03152165 0.2286289 0.02422029 0.0010272815 0.02899737 0.0005787631
## 35     35 0.03152216 0.2286122 0.02421592 0.0010333533 0.03000693 0.0005948846
## 36     36 0.03151619 0.2289292 0.02420485 0.0010524680 0.03062424 0.0006020601
## 37     37 0.03153723 0.2280375 0.02421334 0.0010653592 0.03092351 0.0006154587
## 38     38 0.03153305 0.2282179 0.02421116 0.0010638597 0.03138850 0.0006098035
## 39     39 0.03154750 0.2275818 0.02422317 0.0010763512 0.03139309 0.0006081373
## 40     40 0.03155781 0.2271261 0.02423390 0.0010932059 0.03186254 0.0006352395
## 41     41 0.03157039 0.2265291 0.02424142 0.0010873072 0.03168376 0.0006294560
## 42     42 0.03158852 0.2257109 0.02425656 0.0010930039 0.03180567 0.0006343242
## 43     43 0.03158944 0.2256653 0.02425314 0.0010937785 0.03161029 0.0006234741
## 44     44 0.03161430 0.2245175 0.02427365 0.0010905645 0.03114733 0.0006186374
## 45     45 0.03162132 0.2242148 0.02427171 0.0011006900 0.03111844 0.0006205233
## 46     46 0.03163449 0.2236242 0.02427997 0.0010925258 0.03107534 0.0006140710
## 47     47 0.03166233 0.2223907 0.02430563 0.0010785909 0.03055982 0.0006076598
## 48     48 0.03167535 0.2218009 0.02432416 0.0010819878 0.03071511 0.0006062742
## 49     49 0.03168743 0.2212780 0.02433099 0.0010958227 0.03117002 0.0006127448
## 50     50 0.03169790 0.2208193 0.02433761 0.0011007041 0.03143607 0.0006122051
## 51     51 0.03170385 0.2205859 0.02434684 0.0011148054 0.03176271 0.0006221330
## 52     52 0.03171206 0.2202321 0.02435358 0.0011174232 0.03201158 0.0006302951
## 53     53 0.03171275 0.2202419 0.02434438 0.0011190832 0.03228068 0.0006214280
## 54     54 0.03171942 0.2199857 0.02434491 0.0011195699 0.03200830 0.0006272990
## 55     55 0.03175177 0.2185689 0.02437405 0.0011249799 0.03205779 0.0006227720
## 56     56 0.03176537 0.2179661 0.02439238 0.0011324860 0.03260363 0.0006260234
## 57     57 0.03177822 0.2174000 0.02440121 0.0011261809 0.03236530 0.0006288714
## 58     58 0.03178045 0.2172998 0.02440080 0.0011162237 0.03203053 0.0006184818
## 59     59 0.03179325 0.2167466 0.02441368 0.0011150198 0.03146522 0.0006189895
## 60     60 0.03180388 0.2162887 0.02442668 0.0011220389 0.03164539 0.0006329970
## 61     61 0.03181178 0.2159335 0.02444023 0.0011229509 0.03175014 0.0006380280
## 62     62 0.03182960 0.2151122 0.02445188 0.0011089964 0.03143470 0.0006256259
## 63     63 0.03183547 0.2148748 0.02445908 0.0010986048 0.03100102 0.0006155681
## 64     64 0.03183843 0.2148068 0.02446634 0.0010977924 0.03068568 0.0006170278
## 65     65 0.03183741 0.2148634 0.02447038 0.0010925323 0.03044880 0.0006079075
## 66     66 0.03184972 0.2143235 0.02447909 0.0010950967 0.03051703 0.0006122068
## 67     67 0.03185145 0.2142726 0.02447925 0.0010948184 0.03048138 0.0006139235
## 68     68 0.03186226 0.2138494 0.02448803 0.0011039317 0.03066073 0.0006143266
## 69     69 0.03185310 0.2142871 0.02447907 0.0011164624 0.03095158 0.0006225425
## 70     70 0.03184253 0.2147749 0.02447324 0.0011100205 0.03066794 0.0006184337
## 71     71 0.03183982 0.2148468 0.02448019 0.0011046857 0.03037938 0.0006172816
## 72     72 0.03183951 0.2149365 0.02448457 0.0011123471 0.03114095 0.0006249679
## 73     73 0.03186126 0.2140233 0.02451008 0.0011240563 0.03150107 0.0006356698
## 74     74 0.03187077 0.2136788 0.02452242 0.0011336733 0.03187902 0.0006445357
## 75     75 0.03187397 0.2135744 0.02452600 0.0011269899 0.03173941 0.0006321105
## 76     76 0.03189134 0.2128197 0.02453792 0.0011312457 0.03155818 0.0006380670
## 77     77 0.03189578 0.2126348 0.02453994 0.0011233291 0.03111585 0.0006293408
## 78     78 0.03189367 0.2127511 0.02453910 0.0011326136 0.03134337 0.0006401240
## 79     79 0.03188700 0.2130560 0.02452915 0.0011322009 0.03117663 0.0006368514
## 80     80 0.03189215 0.2128610 0.02453203 0.0011315557 0.03109205 0.0006370118
## 81     81 0.03189711 0.2126117 0.02453410 0.0011220610 0.03106260 0.0006308790
## 82     82 0.03190666 0.2122045 0.02453909 0.0011203534 0.03080748 0.0006359173
## 83     83 0.03191382 0.2119136 0.02453932 0.0011280569 0.03099348 0.0006350279
## 84     84 0.03192120 0.2116056 0.02454643 0.0011330155 0.03109308 0.0006403650
## 85     85 0.03192608 0.2113545 0.02455771 0.0011206555 0.03036942 0.0006322890
## 86     86 0.03193713 0.2108934 0.02456953 0.0011147236 0.03040124 0.0006246585
## 87     87 0.03194515 0.2105549 0.02457501 0.0011184704 0.03029334 0.0006331645
## 88     88 0.03195161 0.2103293 0.02457984 0.0011177850 0.03034283 0.0006389102
## 89     89 0.03195398 0.2102553 0.02458073 0.0011276316 0.03057172 0.0006431748
## 90     90 0.03195710 0.2101202 0.02457268 0.0011301721 0.03057785 0.0006517665
## 91     91 0.03196854 0.2097160 0.02457845 0.0011418368 0.03117163 0.0006531160
## 92     92 0.03197633 0.2094152 0.02458434 0.0011383684 0.03120996 0.0006534548
## 93     93 0.03197903 0.2093464 0.02458929 0.0011361956 0.03103082 0.0006558597
## 94     94 0.03198123 0.2093176 0.02459254 0.0011381135 0.03107207 0.0006574648
## 95     95 0.03198888 0.2089592 0.02460281 0.0011353908 0.03115380 0.0006575974
## 96     96 0.03198110 0.2092615 0.02459781 0.0011322933 0.03116535 0.0006618244
## 97     97 0.03198395 0.2091719 0.02459428 0.0011320889 0.03132325 0.0006636691
## 98     98 0.03199242 0.2089000 0.02459594 0.0011412590 0.03184657 0.0006708533
## 99     99 0.03200024 0.2085274 0.02459575 0.0011408585 0.03154063 0.0006685708
## 100   100 0.03199579 0.2087076 0.02459364 0.0011424166 0.03163829 0.0006779534
## 101   101 0.03200704 0.2082228 0.02460036 0.0011467174 0.03177522 0.0006797074
## 102   102 0.03200882 0.2081417 0.02460712 0.0011403892 0.03159193 0.0006778734
## 103   103 0.03200486 0.2083381 0.02460558 0.0011407146 0.03166114 0.0006801245
## 104   104 0.03200627 0.2082858 0.02460077 0.0011373283 0.03146443 0.0006756929
## 105   105 0.03200221 0.2084937 0.02459636 0.0011439204 0.03170653 0.0006880109
## 106   106 0.03201272 0.2080742 0.02460015 0.0011419743 0.03152758 0.0006905733
## 107   107 0.03202531 0.2075159 0.02460630 0.0011352241 0.03137877 0.0006850509
## 108   108 0.03202664 0.2074637 0.02460652 0.0011338225 0.03147074 0.0006869862
## 109   109 0.03203332 0.2072115 0.02460458 0.0011374076 0.03165383 0.0006918002
## 110   110 0.03202907 0.2073875 0.02460349 0.0011383616 0.03164868 0.0006920873
## 111   111 0.03203395 0.2071944 0.02460799 0.0011406706 0.03171485 0.0006866740
## 112   112 0.03203251 0.2072707 0.02461149 0.0011508964 0.03201357 0.0006954180
## 113   113 0.03203444 0.2071493 0.02461243 0.0011475021 0.03174520 0.0006915712
## 114   114 0.03204342 0.2067747 0.02462032 0.0011448259 0.03159749 0.0006918321
## 115   115 0.03204435 0.2067141 0.02462258 0.0011416225 0.03153306 0.0006871036
## 116   116 0.03204664 0.2066266 0.02462444 0.0011480858 0.03174972 0.0006872334
## 117   117 0.03204745 0.2066152 0.02462266 0.0011451079 0.03163609 0.0006845313
## 118   118 0.03204978 0.2065293 0.02462060 0.0011394005 0.03150090 0.0006782393
## 119   119 0.03205151 0.2064905 0.02462147 0.0011443967 0.03167850 0.0006828976
## 120   120 0.03205198 0.2064815 0.02461950 0.0011463997 0.03189578 0.0006843878
## 121   121 0.03205210 0.2064996 0.02462163 0.0011453276 0.03211526 0.0006824211
## 122   122 0.03205611 0.2063466 0.02462506 0.0011534222 0.03235874 0.0006822606
## 123   123 0.03205325 0.2064445 0.02462534 0.0011473596 0.03202229 0.0006794365
## 124   124 0.03205213 0.2065006 0.02462893 0.0011440842 0.03184795 0.0006762735
## 125   125 0.03204809 0.2066764 0.02462606 0.0011399843 0.03153441 0.0006741584
## 126   126 0.03204615 0.2067729 0.02462117 0.0011390618 0.03141533 0.0006768886
## 127   127 0.03205351 0.2064814 0.02463005 0.0011355616 0.03140888 0.0006751462
## 128   128 0.03206077 0.2061768 0.02463766 0.0011310169 0.03126794 0.0006684354
## 129   129 0.03205975 0.2062339 0.02463285 0.0011337315 0.03130693 0.0006650687
## 130   130 0.03205876 0.2062931 0.02463144 0.0011323441 0.03118920 0.0006613050
## 131   131 0.03206160 0.2061909 0.02463509 0.0011285443 0.03101012 0.0006560125
## 132   132 0.03206107 0.2062135 0.02463441 0.0011264702 0.03112544 0.0006540343
## 133   133 0.03206577 0.2060358 0.02463949 0.0011257133 0.03099645 0.0006527849
## 134   134 0.03207364 0.2057143 0.02464480 0.0011215810 0.03089706 0.0006508847
## 135   135 0.03207002 0.2058688 0.02464425 0.0011213621 0.03096088 0.0006566557
## 136   136 0.03207097 0.2058173 0.02464606 0.0011186111 0.03072206 0.0006521422
## 137   137 0.03206766 0.2059567 0.02464149 0.0011139211 0.03054318 0.0006531895
## 138   138 0.03206930 0.2058871 0.02464457 0.0011127252 0.03039835 0.0006506721
## 139   139 0.03207310 0.2057303 0.02465217 0.0011137675 0.03045836 0.0006482800
## 140   140 0.03207451 0.2056536 0.02465237 0.0011080715 0.03026135 0.0006441299
## 141   141 0.03207719 0.2055463 0.02465513 0.0011095077 0.03026730 0.0006435190
## 142   142 0.03207928 0.2054637 0.02465479 0.0011038163 0.03005955 0.0006413164
## 143   143 0.03208103 0.2053896 0.02465744 0.0011019072 0.02996832 0.0006406203
## 144   144 0.03208111 0.2053790 0.02465736 0.0010973384 0.02961049 0.0006345386
## 145   145 0.03208194 0.2053612 0.02465545 0.0011018680 0.02974609 0.0006388692
## 146   146 0.03208437 0.2052299 0.02465873 0.0010938023 0.02949692 0.0006338057
## 147   147 0.03208406 0.2052562 0.02465668 0.0010935095 0.02956237 0.0006310461
## 148   148 0.03208604 0.2051453 0.02465823 0.0010870176 0.02938592 0.0006249872
## 149   149 0.03208693 0.2051206 0.02465492 0.0010869686 0.02945992 0.0006242159
## 150   150 0.03208646 0.2051545 0.02465684 0.0010906012 0.02968915 0.0006307926
## 151   151 0.03208991 0.2049983 0.02466033 0.0010850405 0.02962800 0.0006262904
## 152   152 0.03208807 0.2050684 0.02465879 0.0010848320 0.02958173 0.0006266656
## 153   153 0.03209198 0.2049048 0.02466160 0.0010865326 0.02967302 0.0006340689
## 154   154 0.03209059 0.2049665 0.02466005 0.0010912670 0.02995850 0.0006370435
## 155   155 0.03209271 0.2048634 0.02466185 0.0010852424 0.02969162 0.0006332863
## 156   156 0.03208815 0.2050888 0.02465902 0.0010924470 0.02997242 0.0006412033
## 157   157 0.03208726 0.2051485 0.02465890 0.0010906533 0.02999603 0.0006367577
## 158   158 0.03208807 0.2051117 0.02466068 0.0010895200 0.02997214 0.0006380193
## 159   159 0.03208713 0.2051510 0.02466076 0.0010950074 0.03011695 0.0006430644
## 160   160 0.03208550 0.2052221 0.02465649 0.0010971070 0.03019039 0.0006400439
## 161   161 0.03208434 0.2052808 0.02465471 0.0010984559 0.03027519 0.0006381159
## 162   162 0.03208650 0.2051937 0.02465717 0.0010976655 0.03020050 0.0006388805
## 163   163 0.03208480 0.2052655 0.02465666 0.0010968614 0.03018698 0.0006381939
## 164   164 0.03208664 0.2051799 0.02465571 0.0010979172 0.03014250 0.0006396657
## 165   165 0.03208534 0.2052296 0.02465249 0.0010947095 0.03006130 0.0006375254
## 166   166 0.03208729 0.2051538 0.02465198 0.0010957634 0.03002880 0.0006368879
## 167   167 0.03208545 0.2052294 0.02464769 0.0010957616 0.03001456 0.0006378650
## 168   168 0.03208686 0.2051560 0.02464786 0.0010961027 0.02993675 0.0006418943
## 169   169 0.03208900 0.2050673 0.02465059 0.0010945804 0.02986548 0.0006445203
## 170   170 0.03209100 0.2049806 0.02465291 0.0010952796 0.02982943 0.0006436645
## 171   171 0.03209308 0.2048980 0.02465493 0.0010934558 0.02983813 0.0006419822
## 172   172 0.03208971 0.2050415 0.02465281 0.0010959212 0.02994237 0.0006448324
## 173   173 0.03208690 0.2051598 0.02464906 0.0010990414 0.03000044 0.0006467680
## 174   174 0.03208574 0.2052060 0.02464758 0.0010960653 0.02988539 0.0006440678
## 175   175 0.03208904 0.2050636 0.02464807 0.0010936824 0.02987995 0.0006432163
## 176   176 0.03209221 0.2049495 0.02465047 0.0010981691 0.03006503 0.0006459725
## 177   177 0.03209210 0.2049623 0.02465121 0.0011015898 0.03018727 0.0006496947
## 178   178 0.03209387 0.2048928 0.02465288 0.0011043859 0.03034912 0.0006483820
## 179   179 0.03209545 0.2048292 0.02465458 0.0011027677 0.03035071 0.0006473268
## 180   180 0.03209864 0.2047138 0.02465788 0.0011012264 0.03034704 0.0006475026
## 181   181 0.03209769 0.2047604 0.02465674 0.0011031500 0.03050982 0.0006471425
## 182   182 0.03210209 0.2045756 0.02466151 0.0011050947 0.03054713 0.0006483915
## 183   183 0.03210369 0.2045122 0.02466073 0.0011021377 0.03041440 0.0006435666
## 184   184 0.03210651 0.2043894 0.02466254 0.0011052458 0.03051528 0.0006463747
## 185   185 0.03210607 0.2044233 0.02466032 0.0011044674 0.03045134 0.0006454267
## 186   186 0.03210514 0.2044533 0.02465868 0.0011046857 0.03050533 0.0006450899
## 187   187 0.03210623 0.2044012 0.02465785 0.0011019948 0.03032322 0.0006426781
## 188   188 0.03210596 0.2044166 0.02465905 0.0011021026 0.03025874 0.0006438049
## 189   189 0.03210699 0.2043724 0.02466068 0.0011042746 0.03029489 0.0006440600
## 190   190 0.03210737 0.2043579 0.02466382 0.0011020023 0.03023518 0.0006424142
## 191   191 0.03210572 0.2044321 0.02466306 0.0011056422 0.03038783 0.0006429226
## 192   192 0.03210889 0.2042929 0.02466381 0.0011051824 0.03039140 0.0006419025
## 193   193 0.03210620 0.2044115 0.02466275 0.0011069860 0.03042055 0.0006446823
## 194   194 0.03210639 0.2044071 0.02466310 0.0011043911 0.03039051 0.0006425709
## 195   195 0.03210837 0.2043225 0.02466366 0.0011032147 0.03033253 0.0006405611
## 196   196 0.03210908 0.2042832 0.02466350 0.0011021880 0.03026979 0.0006390210
## 197   197 0.03211204 0.2041602 0.02466543 0.0011028568 0.03025483 0.0006389382
## 198   198 0.03211177 0.2041733 0.02466593 0.0011045549 0.03031012 0.0006391914
## 199   199 0.03211207 0.2041501 0.02466657 0.0011028756 0.03021979 0.0006397886
## 200   200 0.03211105 0.2041910 0.02466688 0.0011032678 0.03021656 0.0006398686
## 201   201 0.03211192 0.2041520 0.02466683 0.0011034949 0.03022254 0.0006383202
## 202   202 0.03211073 0.2042074 0.02466744 0.0011031915 0.03020890 0.0006384131
## 203   203 0.03211102 0.2041871 0.02466820 0.0011034352 0.03016556 0.0006388333
## 204   204 0.03210934 0.2042607 0.02466730 0.0011038130 0.03020391 0.0006372284
## 205   205 0.03210879 0.2042884 0.02466635 0.0011043812 0.03021556 0.0006385534
## 206   206 0.03210963 0.2042546 0.02466720 0.0011042217 0.03017080 0.0006389351
## 207   207 0.03210741 0.2043499 0.02466485 0.0011037773 0.03018185 0.0006391489
## 208   208 0.03210681 0.2043787 0.02466397 0.0011051015 0.03023139 0.0006404778
## 209   209 0.03210924 0.2042686 0.02466627 0.0011054587 0.03023173 0.0006415652
## 210   210 0.03210940 0.2042757 0.02466676 0.0011074000 0.03032859 0.0006436772
## 211   211 0.03210883 0.2042997 0.02466565 0.0011078063 0.03033795 0.0006444732
## 212   212 0.03210966 0.2042697 0.02466680 0.0011076091 0.03033941 0.0006453087
## 213   213 0.03210924 0.2042822 0.02466630 0.0011072704 0.03033843 0.0006456218
## 214   214 0.03210866 0.2043080 0.02466545 0.0011073205 0.03030701 0.0006458381
## 215   215 0.03210968 0.2042603 0.02466500 0.0011077043 0.03029705 0.0006460638
## 216   216 0.03211078 0.2042151 0.02466665 0.0011067384 0.03026335 0.0006449334
## 217   217 0.03211109 0.2042072 0.02466717 0.0011092032 0.03036328 0.0006470520
## 218   218 0.03211111 0.2042070 0.02466656 0.0011089075 0.03033337 0.0006467482
## 219   219 0.03211051 0.2042295 0.02466618 0.0011083204 0.03031500 0.0006466827
## 220   220 0.03211076 0.2042154 0.02466654 0.0011077153 0.03028606 0.0006450397
## 221   221 0.03211001 0.2042474 0.02466652 0.0011080025 0.03032290 0.0006449498
## 222   222 0.03211061 0.2042185 0.02466710 0.0011079167 0.03033603 0.0006443065
## 223   223 0.03210934 0.2042676 0.02466648 0.0011072420 0.03030709 0.0006433694
## 224   224 0.03211009 0.2042370 0.02466770 0.0011072195 0.03032682 0.0006431438
## 225   225 0.03210996 0.2042470 0.02466728 0.0011070552 0.03031321 0.0006424427
## 226   226 0.03210957 0.2042651 0.02466715 0.0011077269 0.03033567 0.0006430903
## 227   227 0.03210969 0.2042642 0.02466744 0.0011083432 0.03038858 0.0006432803
## 228   228 0.03210966 0.2042649 0.02466702 0.0011083884 0.03039151 0.0006432022
## 229   229 0.03210980 0.2042614 0.02466716 0.0011091508 0.03042010 0.0006439305
## 230   230 0.03210965 0.2042683 0.02466755 0.0011094279 0.03042562 0.0006438716
## 231   231 0.03210959 0.2042718 0.02466768 0.0011091845 0.03042081 0.0006433408
## 232   232 0.03211034 0.2042399 0.02466868 0.0011088124 0.03040087 0.0006427493
## 233   233 0.03211031 0.2042421 0.02466841 0.0011094963 0.03043202 0.0006434366
## 234   234 0.03211014 0.2042491 0.02466836 0.0011092755 0.03042107 0.0006430091
## 235   235 0.03211022 0.2042449 0.02466844 0.0011091303 0.03041803 0.0006434017
## 236   236 0.03210982 0.2042631 0.02466832 0.0011092561 0.03042793 0.0006437501
## 237   237 0.03210997 0.2042566 0.02466848 0.0011088841 0.03040869 0.0006432413
## 238   238 0.03210991 0.2042592 0.02466824 0.0011087237 0.03040023 0.0006431995
## 239   239 0.03210989 0.2042603 0.02466821 0.0011087423 0.03039855 0.0006432065
## 240   240 0.03210993 0.2042585 0.02466824 0.0011087027 0.03039707 0.0006431900
## [1] "Best Model"
##    nvmax
## 13    13

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  1.972681e+00  1.956867e+00  1.988495e+00
## x4          -4.607173e-05 -6.334938e-05 -2.879408e-05
## x7           1.080812e-02  9.583611e-03  1.203263e-02
## x8           4.784756e-04  1.929724e-04  7.639787e-04
## x9           3.106132e-03  2.470605e-03  3.741659e-03
## x10          1.140634e-03  5.481000e-04  1.733168e-03
## x11          2.032330e+05  6.180148e+04  3.446645e+05
## x16          1.063749e-03  6.532151e-04  1.474284e-03
## x17          1.860411e-03  1.237166e-03  2.483656e-03
## stat14      -7.371136e-04 -1.207823e-03 -2.664039e-04
## stat41      -7.435917e-04 -1.216279e-03 -2.709043e-04
## stat98       3.577314e-03  3.108078e-03  4.046550e-03
## stat110     -3.392311e-03 -3.868431e-03 -2.916191e-03
## x18.sqrt     2.586108e-02  2.405283e-02  2.766932e-02

Test

if (algo.backward.caret == TRUE){
  test.model(model.backward, data.test
             ,method = 'leapBackward',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.039   2.084   2.097   2.097   2.109   2.145 
## [1] "leapBackward  Test MSE: 0.00104530808325543"

Stepwise Selection with CV

Train

if (algo.stepwise.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "leapSeq"
                                   ,feature.names = feature.names)
  model.stepwise = returned$model
  id = returned$id
}
## Aggregating results
## Selecting tuning parameters
## Fitting nvmax = 11 on full training set
## [1] "All models results"
##     nvmax       RMSE  Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.03389262 0.1107374 0.02638098 0.0006522431 0.01843699 0.0004657941
## 2       2 0.03307022 0.1535661 0.02570143 0.0007777253 0.02125833 0.0006093741
## 3       3 0.03251205 0.1824264 0.02515777 0.0007730663 0.02790904 0.0005946623
## 4       4 0.03203058 0.2063268 0.02453996 0.0007618678 0.02742323 0.0006125272
## 5       5 0.03175409 0.2198443 0.02433833 0.0007521550 0.02365268 0.0005893048
## 6       6 0.03164410 0.2252731 0.02426658 0.0007067374 0.02069752 0.0005718563
## 7       7 0.03167451 0.2237877 0.02430027 0.0007134016 0.02161979 0.0006044690
## 8       8 0.03162055 0.2265125 0.02428486 0.0007295711 0.02287693 0.0005987685
## 9       9 0.03155615 0.2297157 0.02422169 0.0007806923 0.02479613 0.0006297838
## 10     10 0.03155548 0.2298092 0.02421996 0.0007214398 0.02383271 0.0005854693
## 11     11 0.03154196 0.2304901 0.02420858 0.0007129736 0.02369648 0.0005638611
## 12     12 0.03157136 0.2290734 0.02422468 0.0007175387 0.02209994 0.0005662709
## 13     13 0.03157326 0.2289631 0.02424003 0.0006890899 0.02081163 0.0005325302
## 14     14 0.03157408 0.2288482 0.02424375 0.0006872834 0.02012904 0.0005257180
## 15     15 0.03157894 0.2287061 0.02423653 0.0007038994 0.02024370 0.0005411458
## 16     16 0.03190636 0.2117547 0.02452156 0.0013404932 0.06017317 0.0011044116
## 17     17 0.03157406 0.2290131 0.02421312 0.0007224985 0.01997446 0.0005265877
## 18     18 0.03156651 0.2293337 0.02420193 0.0007170687 0.01830741 0.0005263061
## 19     19 0.03154426 0.2304284 0.02418362 0.0007083238 0.01837859 0.0005279866
## 20     20 0.03156336 0.2295238 0.02420827 0.0007248759 0.01924171 0.0005474819
## 21     21 0.03159715 0.2279797 0.02422729 0.0007282979 0.02017570 0.0005567951
## 22     22 0.03196738 0.2092553 0.02453578 0.0015485656 0.06013615 0.0012713111
## 23     23 0.03158918 0.2284546 0.02422965 0.0007397660 0.02140798 0.0005825951
## 24     24 0.03156738 0.2294569 0.02421477 0.0007204914 0.02062076 0.0005591127
## 25     25 0.03158046 0.2288811 0.02424395 0.0007370698 0.01940678 0.0005602539
## 26     26 0.03155988 0.2299272 0.02423487 0.0007381787 0.01977053 0.0005594708
## 27     27 0.03157513 0.2292237 0.02425307 0.0007616370 0.02089499 0.0005961089
## 28     28 0.03157278 0.2293200 0.02425732 0.0007545404 0.02083975 0.0005969809
## 29     29 0.03180893 0.2174105 0.02442729 0.0012809581 0.04991480 0.0009363540
## 30     30 0.03229356 0.1930513 0.02484382 0.0016910689 0.06879877 0.0013931207
## 31     31 0.03193126 0.2113926 0.02452373 0.0013507778 0.06042039 0.0010222070
## 32     32 0.03194276 0.2105584 0.02458709 0.0013168990 0.05826289 0.0010729542
## 33     33 0.03163171 0.2269222 0.02431474 0.0007451232 0.02212524 0.0005848146
## 34     34 0.03165120 0.2260814 0.02433254 0.0007545036 0.02282494 0.0005976554
## 35     35 0.03164835 0.2261771 0.02432278 0.0007779652 0.02291525 0.0006327695
## 36     36 0.03190482 0.2120608 0.02454376 0.0008175801 0.05080420 0.0008048053
## 37     37 0.03223647 0.1951826 0.02486195 0.0014064694 0.07066364 0.0011396934
## 38     38 0.03170411 0.2236700 0.02438520 0.0007675873 0.02116256 0.0006103664
## 39     39 0.03196900 0.2094742 0.02463244 0.0010131088 0.05118741 0.0008450926
## 40     40 0.03196216 0.2104704 0.02457026 0.0009346952 0.04384750 0.0007947740
## 41     41 0.03192549 0.2124119 0.02451896 0.0012361352 0.04679187 0.0009139803
## 42     42 0.03217811 0.2002617 0.02475941 0.0013738197 0.05953275 0.0010045000
## 43     43 0.03205242 0.2061287 0.02462206 0.0013589357 0.05964128 0.0010463279
## 44     44 0.03270386 0.1734083 0.02512338 0.0018333297 0.08130799 0.0013313729
## 45     45 0.03205640 0.2056472 0.02465931 0.0013004827 0.05601458 0.0010619360
## 46     46 0.03221936 0.1985616 0.02479355 0.0013553883 0.05856631 0.0009874377
## 47     47 0.03177494 0.2206460 0.02442074 0.0008064939 0.02222634 0.0006460526
## 48     48 0.03202344 0.2077207 0.02462188 0.0009752004 0.04581255 0.0008351891
## 49     49 0.03296207 0.1595841 0.02538980 0.0017383653 0.07823696 0.0013980231
## 50     50 0.03204341 0.2060375 0.02463894 0.0007839917 0.04815828 0.0007573110
## 51     51 0.03206358 0.2051349 0.02464930 0.0007638784 0.04789335 0.0007449423
## 52     52 0.03184275 0.2178206 0.02447732 0.0007952133 0.02297393 0.0006213095
## 53     53 0.03218564 0.2006009 0.02474453 0.0011034122 0.04444579 0.0008989274
## 54     54 0.03272897 0.1719717 0.02525041 0.0016035335 0.07328474 0.0012624810
## 55     55 0.03190696 0.2150266 0.02450950 0.0007919428 0.02295894 0.0006465792
## 56     56 0.03234443 0.1933654 0.02487484 0.0013027446 0.05620578 0.0009719069
## 57     57 0.03190826 0.2150342 0.02450711 0.0007731364 0.02170786 0.0006391728
## 58     58 0.03270840 0.1728509 0.02522079 0.0011853066 0.06469140 0.0010109080
## 59     59 0.03216460 0.2007868 0.02471767 0.0007355282 0.04601677 0.0007522352
## 60     60 0.03258182 0.1810157 0.02504379 0.0017598213 0.07367551 0.0014331603
## 61     61 0.03217749 0.2002409 0.02472311 0.0007516287 0.04654581 0.0007534026
## 62     62 0.03270112 0.1723941 0.02518733 0.0012381587 0.07342228 0.0010739223
## 63     63 0.03303636 0.1566573 0.02536752 0.0017599415 0.07813027 0.0014057263
## 64     64 0.03198178 0.2118771 0.02456126 0.0007936143 0.02250020 0.0006583984
## 65     65 0.03227865 0.1966534 0.02481696 0.0014181398 0.05569133 0.0011230864
## 66     66 0.03225265 0.1977175 0.02477948 0.0012661378 0.05455858 0.0009811456
## 67     67 0.03228552 0.1963687 0.02482691 0.0014201821 0.05564907 0.0011313731
## 68     68 0.03239186 0.1889456 0.02486805 0.0011488409 0.05940391 0.0009812912
## 69     69 0.03292106 0.1641962 0.02532547 0.0018663962 0.07439338 0.0015179714
## 70     70 0.03228190 0.1965133 0.02482179 0.0014066723 0.05535723 0.0011032126
## 71     71 0.03242249 0.1869698 0.02498789 0.0008577006 0.05900046 0.0008312052
## 72     72 0.03263117 0.1791488 0.02509048 0.0018208783 0.07199665 0.0014550252
## 73     73 0.03226859 0.1970791 0.02479467 0.0012548538 0.05441653 0.0009492339
## 74     74 0.03254785 0.1819450 0.02502171 0.0009745416 0.05587756 0.0009345276
## 75     75 0.03264442 0.1782365 0.02509928 0.0017374921 0.07186230 0.0014153600
## 76     76 0.03253261 0.1825747 0.02503149 0.0013337934 0.06604520 0.0011417823
## 77     77 0.03227804 0.1966769 0.02480360 0.0012537172 0.05433758 0.0009701228
## 78     78 0.03286111 0.1664006 0.02529508 0.0014125608 0.06860469 0.0011823598
## 79     79 0.03232460 0.1948419 0.02484955 0.0010716578 0.04348243 0.0008891385
## 80     80 0.03299645 0.1604910 0.02543018 0.0016574642 0.07354738 0.0012389834
## 81     81 0.03203145 0.2098580 0.02462138 0.0007695140 0.02243368 0.0006214841
## 82     82 0.03224029 0.1980646 0.02484341 0.0009336145 0.04602808 0.0007666668
## 83     83 0.03223523 0.1989800 0.02476874 0.0011971892 0.04715206 0.0009132621
## 84     84 0.03277294 0.1728990 0.02521700 0.0013303382 0.05919834 0.0009641456
## 85     85 0.03281866 0.1710558 0.02523939 0.0016711046 0.06877583 0.0012488075
## 86     86 0.03249750 0.1845183 0.02503379 0.0012801992 0.06427935 0.0010009530
## 87     87 0.03280002 0.1714870 0.02524755 0.0015214516 0.06847507 0.0011245466
## 88     88 0.03233594 0.1943903 0.02487820 0.0013793063 0.05447681 0.0010849318
## 89     89 0.03222450 0.1995041 0.02475916 0.0012002133 0.04772528 0.0009190266
## 90     90 0.03254352 0.1826926 0.02510358 0.0014340406 0.06510248 0.0011255931
## 91     91 0.03238513 0.1924834 0.02489125 0.0014818651 0.05697926 0.0011954186
## 92     92 0.03224006 0.1981625 0.02485293 0.0009259335 0.04623628 0.0007592645
## 93     93 0.03204973 0.2091133 0.02464788 0.0007630802 0.02266234 0.0006141097
## 94     94 0.03257437 0.1809023 0.02509379 0.0013011009 0.06567833 0.0011195969
## 95     95 0.03206123 0.2086692 0.02466295 0.0007724915 0.02299611 0.0006223798
## 96     96 0.03331895 0.1422237 0.02571213 0.0019243695 0.08460243 0.0015018316
## 97     97 0.03261531 0.1797927 0.02509662 0.0011951369 0.05910872 0.0010101957
## 98     98 0.03281894 0.1708758 0.02529712 0.0015227201 0.06837044 0.0011294593
## 99     99 0.03259488 0.1807404 0.02512520 0.0015255755 0.06668401 0.0012239505
## 100   100 0.03250003 0.1872431 0.02501616 0.0012202174 0.05246380 0.0008259194
## 101   101 0.03238220 0.1924862 0.02491359 0.0011120299 0.04670515 0.0009208317
## 102   102 0.03207302 0.2083361 0.02467570 0.0007658074 0.02330937 0.0006177188
## 103   103 0.03251042 0.1868173 0.02502162 0.0012198557 0.05247733 0.0008104407
## 104   104 0.03235978 0.1936575 0.02491642 0.0013618990 0.05396057 0.0010680689
## 105   105 0.03239992 0.1921270 0.02492509 0.0014720671 0.05693844 0.0011912118
## 106   106 0.03208365 0.2079157 0.02468668 0.0007690143 0.02360825 0.0006302179
## 107   107 0.03272777 0.1748933 0.02519339 0.0012914664 0.06361111 0.0009158302
## 108   108 0.03256879 0.1823655 0.02508556 0.0016432724 0.06728165 0.0012813883
## 109   109 0.03208839 0.2077815 0.02468660 0.0007638979 0.02321952 0.0006291254
## 110   110 0.03275143 0.1742155 0.02524319 0.0014720328 0.06578984 0.0012417361
## 111   111 0.03210022 0.2072567 0.02469912 0.0007532593 0.02329091 0.0006201568
## 112   112 0.03234495 0.1934208 0.02488906 0.0007455030 0.04916116 0.0007782915
## 113   113 0.03282220 0.1682226 0.02531206 0.0013679681 0.07386064 0.0011005833
## 114   114 0.03278215 0.1729357 0.02525850 0.0014818603 0.06615637 0.0012516033
## 115   115 0.03282348 0.1687880 0.02525418 0.0016128246 0.07653423 0.0012973311
## 116   116 0.03297523 0.1617093 0.02537799 0.0015938689 0.07732054 0.0013721641
## 117   117 0.03266643 0.1781347 0.02517356 0.0016298579 0.07095424 0.0013324249
## 118   118 0.03240974 0.1916885 0.02495676 0.0013292524 0.05280513 0.0010497223
## 119   119 0.03261332 0.1802813 0.02511146 0.0014298646 0.06552485 0.0011537237
## 120   120 0.03229448 0.1967973 0.02490154 0.0008657819 0.04189151 0.0007341068
## 121   121 0.03227531 0.1978130 0.02482283 0.0008834765 0.04170865 0.0007578720
## 122   122 0.03234726 0.1942724 0.02489595 0.0007634031 0.04729228 0.0007926131
## 123   123 0.03239991 0.1926545 0.02498390 0.0009662668 0.03903177 0.0009746519
## 124   124 0.03263165 0.1805597 0.02509780 0.0010689266 0.04606071 0.0007422657
## 125   125 0.03284427 0.1695182 0.02529752 0.0015538037 0.06662734 0.0013126628
## 126   126 0.03243223 0.1911956 0.02493885 0.0008845017 0.03451720 0.0005518075
## 127   127 0.03213794 0.2059163 0.02471882 0.0007563696 0.02446492 0.0006160069
## 128   128 0.03240114 0.1923913 0.02497495 0.0011193227 0.04702881 0.0009758222
## 129   129 0.03214064 0.2057934 0.02472525 0.0007576194 0.02471343 0.0006083244
## 130   130 0.03234329 0.1953112 0.02489857 0.0012666434 0.05064304 0.0010301210
## 131   131 0.03215423 0.2053147 0.02473458 0.0007625378 0.02500559 0.0006138360
## 132   132 0.03236292 0.1947478 0.02494170 0.0008428065 0.03130747 0.0007882416
## 133   133 0.03246571 0.1880965 0.02499406 0.0008095321 0.05050795 0.0007927231
## 134   134 0.03234827 0.1951122 0.02491964 0.0009961354 0.04021427 0.0008235674
## 135   135 0.03214651 0.2056296 0.02473753 0.0007550915 0.02504273 0.0006088901
## 136   136 0.03250483 0.1867763 0.02501774 0.0012122568 0.05062705 0.0009244021
## 137   137 0.03272336 0.1755202 0.02518225 0.0008100232 0.04594793 0.0006543020
## 138   138 0.03286597 0.1680172 0.02534444 0.0013508950 0.05540468 0.0011132573
## 139   139 0.03213980 0.2059246 0.02472159 0.0007383313 0.02408760 0.0006088668
## 140   140 0.03279876 0.1723497 0.02522430 0.0010439499 0.04878411 0.0008361220
## 141   141 0.03228084 0.1980190 0.02485333 0.0008434060 0.03881870 0.0006539497
## 142   142 0.03231800 0.1962194 0.02488409 0.0007288258 0.04264854 0.0007560841
## 143   143 0.03231249 0.1964752 0.02487780 0.0007265343 0.04253209 0.0007564128
## 144   144 0.03250434 0.1870728 0.02498109 0.0012564309 0.05271093 0.0010187274
## 145   145 0.03246186 0.1882370 0.02501522 0.0007800813 0.05030780 0.0007485211
## 146   146 0.03228614 0.1980761 0.02483963 0.0009421280 0.03782737 0.0007582706
## 147   147 0.03212439 0.2066283 0.02471155 0.0007572386 0.02475593 0.0006200199
## 148   148 0.03279512 0.1727491 0.02526594 0.0013497432 0.05228611 0.0011412622
## 149   149 0.03212233 0.2067047 0.02471583 0.0007487306 0.02448356 0.0006148482
## 150   150 0.03238229 0.1939010 0.02490365 0.0007968104 0.02864210 0.0004684423
## 151   151 0.03275673 0.1744202 0.02523422 0.0012607549 0.05258216 0.0010422328
## 152   152 0.03236674 0.1945862 0.02488977 0.0007960599 0.02826401 0.0004687335
## 153   153 0.03261601 0.1823983 0.02506313 0.0011971036 0.04331338 0.0008337506
## 154   154 0.03273379 0.1753356 0.02524099 0.0011649759 0.04947155 0.0010052775
## 155   155 0.03212747 0.2065048 0.02471961 0.0007451542 0.02409970 0.0006054221
## 156   156 0.03230235 0.1973791 0.02485826 0.0009664567 0.03912220 0.0007777554
## 157   157 0.03227549 0.1983409 0.02485214 0.0008436627 0.03881141 0.0006468913
## 158   158 0.03285877 0.1682981 0.02535308 0.0012703161 0.05658262 0.0011175112
## 159   159 0.03244966 0.1894975 0.02496935 0.0010867507 0.04703700 0.0008743687
## 160   160 0.03230124 0.1974384 0.02484859 0.0009581915 0.03913552 0.0007714527
## 161   161 0.03258283 0.1833713 0.02507221 0.0006930945 0.04231538 0.0005990178
## 162   162 0.03237783 0.1942782 0.02488930 0.0012257032 0.04492003 0.0009677082
## 163   163 0.03229270 0.1977343 0.02483510 0.0008638382 0.04052628 0.0007397562
## 164   164 0.03213083 0.2063660 0.02471082 0.0007154935 0.02319874 0.0005939863
## 165   165 0.03229131 0.1977951 0.02483823 0.0008661526 0.04053791 0.0007432873
## 166   166 0.03212787 0.2064684 0.02471457 0.0007220392 0.02311780 0.0005953232
## 167   167 0.03236467 0.1946424 0.02492524 0.0008838034 0.03421602 0.0008231460
## 168   168 0.03254317 0.1855407 0.02502903 0.0012894299 0.05395799 0.0010496752
## 169   169 0.03238119 0.1939853 0.02490309 0.0007779993 0.02780307 0.0004557753
## 170   170 0.03213389 0.2062221 0.02471898 0.0007266104 0.02328550 0.0005914244
## 171   171 0.03230372 0.1974000 0.02485292 0.0010141597 0.03631847 0.0007877086
## 172   172 0.03213171 0.2063021 0.02472116 0.0007275140 0.02313804 0.0005910438
## 173   173 0.03212436 0.2066456 0.02471346 0.0007305029 0.02332999 0.0005987271
## 174   174 0.03236115 0.1948452 0.02492836 0.0008823393 0.03377038 0.0008177006
## 175   175 0.03212588 0.2065730 0.02471868 0.0007287127 0.02345189 0.0005973729
## 176   176 0.03230320 0.1973545 0.02486245 0.0009675710 0.03941914 0.0007817729
## 177   177 0.03233930 0.1955725 0.02490764 0.0007389759 0.04430253 0.0007780034
## 178   178 0.03212460 0.2066435 0.02472257 0.0007308067 0.02353279 0.0005980228
## 179   179 0.03211966 0.2068668 0.02471943 0.0007344497 0.02365057 0.0005986615
## 180   180 0.03212345 0.2067046 0.02472209 0.0007317950 0.02354773 0.0005965431
## 181   181 0.03227059 0.1986670 0.02484626 0.0008333514 0.03838499 0.0006473446
## 182   182 0.03212549 0.2066035 0.02471853 0.0007340489 0.02363249 0.0006065225
## 183   183 0.03212182 0.2067590 0.02471665 0.0007372081 0.02365035 0.0006074755
## 184   184 0.03228411 0.1983258 0.02487134 0.0011314842 0.04350718 0.0009678256
## 185   185 0.03231140 0.1968470 0.02489048 0.0009759886 0.03877548 0.0008057550
## 186   186 0.03261688 0.1826699 0.02507366 0.0012117376 0.04338476 0.0008449042
## 187   187 0.03212354 0.2067003 0.02472043 0.0007349509 0.02385099 0.0006079329
## 188   188 0.03271750 0.1760710 0.02524236 0.0010796804 0.04977342 0.0009309751
## 189   189 0.03212454 0.2066545 0.02472041 0.0007402202 0.02392888 0.0006107112
## 190   190 0.03237661 0.1941804 0.02494091 0.0009262456 0.03585763 0.0008543306
## 191   191 0.03227252 0.1986018 0.02484862 0.0008484927 0.03888484 0.0006611327
## 192   192 0.03212279 0.2067393 0.02471832 0.0007429165 0.02397257 0.0006142717
## 193   193 0.03212049 0.2068400 0.02471873 0.0007441163 0.02400649 0.0006163243
## 194   194 0.03236386 0.1948409 0.02489572 0.0007700140 0.02684707 0.0004516523
## 195   195 0.03254624 0.1853773 0.02507973 0.0009996753 0.04662736 0.0009464343
## 196   196 0.03211866 0.2069436 0.02471517 0.0007440972 0.02388126 0.0006151766
## 197   197 0.03212030 0.2068781 0.02471536 0.0007446741 0.02398357 0.0006153619
## 198   198 0.03212070 0.2068622 0.02471730 0.0007420512 0.02392750 0.0006132222
## 199   199 0.03254132 0.1855151 0.02507159 0.0009709762 0.05386286 0.0009242990
## 200   200 0.03247202 0.1883849 0.02502590 0.0010534061 0.04898955 0.0008330219
## 201   201 0.03231617 0.1969197 0.02486917 0.0010106702 0.04147785 0.0008086191
## 202   202 0.03212045 0.2068743 0.02471308 0.0007398835 0.02397711 0.0006106790
## 203   203 0.03229894 0.1976979 0.02485596 0.0009101831 0.04240294 0.0007931080
## 204   204 0.03211407 0.2071354 0.02470865 0.0007444526 0.02399015 0.0006109085
## 205   205 0.03229148 0.1981582 0.02487531 0.0011880177 0.04578411 0.0010150249
## 206   206 0.03211671 0.2070267 0.02471071 0.0007447954 0.02393297 0.0006106550
## 207   207 0.03211887 0.2069371 0.02471254 0.0007427624 0.02402469 0.0006106533
## 208   208 0.03261720 0.1824213 0.02511071 0.0008691547 0.03283763 0.0006916846
## 209   209 0.03212021 0.2068822 0.02471515 0.0007409750 0.02398888 0.0006081807
## 210   210 0.03212055 0.2068709 0.02471569 0.0007407612 0.02400551 0.0006075216
## 211   211 0.03212133 0.2068386 0.02471700 0.0007418947 0.02403101 0.0006077557
## 212   212 0.03231318 0.1968899 0.02488291 0.0009903616 0.03938384 0.0008081808
## 213   213 0.03212086 0.2068608 0.02471562 0.0007435165 0.02402960 0.0006077945
## 214   214 0.03212133 0.2068388 0.02471609 0.0007441390 0.02405444 0.0006079927
## 215   215 0.03268063 0.1801816 0.02516311 0.0014002530 0.05227637 0.0011752653
## 216   216 0.03230009 0.1976550 0.02485208 0.0010396276 0.03742624 0.0008042829
## 217   217 0.03212368 0.2067302 0.02471694 0.0007412252 0.02386297 0.0006050774
## 218   218 0.03212379 0.2067215 0.02471769 0.0007408228 0.02386854 0.0006057193
## 219   219 0.03212279 0.2067716 0.02471636 0.0007421437 0.02393657 0.0006064402
## 220   220 0.03239850 0.1934103 0.02495784 0.0009593283 0.03735062 0.0008821692
## 221   221 0.03212575 0.2066443 0.02471894 0.0007413844 0.02395601 0.0006063551
## 222   222 0.03255048 0.1851097 0.02508822 0.0009753751 0.05303685 0.0009280365
## 223   223 0.03233454 0.1962169 0.02489273 0.0010401791 0.04259350 0.0008456203
## 224   224 0.03258596 0.1843012 0.02507020 0.0014725559 0.05340019 0.0011604255
## 225   225 0.03212529 0.2066575 0.02471795 0.0007423461 0.02394726 0.0006068688
## 226   226 0.03233024 0.1963998 0.02488963 0.0010301119 0.04208109 0.0008380879
## 227   227 0.03230123 0.1975935 0.02487127 0.0008882659 0.04111235 0.0006762506
## 228   228 0.03237485 0.1943663 0.02489854 0.0007866438 0.02791711 0.0004657703
## 229   229 0.03212581 0.2066407 0.02471867 0.0007424296 0.02396027 0.0006065668
## 230   230 0.03233011 0.1962688 0.02489453 0.0010210913 0.04062813 0.0008329102
## 231   231 0.03232256 0.1967568 0.02487605 0.0009375355 0.04385260 0.0008125199
## 232   232 0.03254075 0.1857811 0.02502507 0.0010065551 0.03596151 0.0006477208
## 233   233 0.03232399 0.1968593 0.02490264 0.0012425605 0.04796890 0.0010621841
## 234   234 0.03212530 0.2066589 0.02471809 0.0007407540 0.02389895 0.0006053082
## 235   235 0.03242393 0.1928416 0.02495409 0.0013546307 0.04903128 0.0011069195
## 236   236 0.03246543 0.1890727 0.02499402 0.0010984918 0.04737600 0.0008141455
## 237   237 0.03271168 0.1773100 0.02522445 0.0015345402 0.05998383 0.0012653944
## 238   238 0.03271903 0.1767379 0.02522470 0.0012565159 0.05810254 0.0009799755
## 239   239 0.03336489 0.1468546 0.02575478 0.0013820283 0.06609633 0.0013056925
## 240   240 0.03212568 0.2066400 0.02471817 0.0007406867 0.02389293 0.0006054287
## [1] "Best Model"
##    nvmax
## 11    11

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients of final model:"
##                  Estimate         2.5 %        97.5 %
## (Intercept)  1.9943108240  1.987270e+00  2.001352e+00
## x4          -0.0000563195 -7.366668e-05 -3.897232e-05
## x7           0.0111695138  9.941573e-03  1.239745e-02
## x9           0.0032703436  2.633200e-03  3.907487e-03
## x10          0.0010757959  4.818071e-04  1.669785e-03
## x16          0.0009034675  4.932559e-04  1.313679e-03
## x17          0.0013944496  7.693954e-04  2.019504e-03
## x21          0.0001290356  4.794220e-05  2.101289e-04
## stat14      -0.0009237427 -1.396307e-03 -4.511779e-04
## stat98       0.0035049260  3.034519e-03  3.975332e-03
## stat110     -0.0031947718 -3.668557e-03 -2.720987e-03
## x18.sqrt     0.0265348474  2.470918e-02  2.836052e-02

Test

if (algo.stepwise.caret == TRUE){
  test.model(model.stepwise, data.test
             ,method = 'leapSeq',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,id = id
             ,draw.limits = TRUE, transformation = t)
  
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.037   2.084   2.097   2.097   2.109   2.144 
## [1] "leapSeq  Test MSE: 0.00103220592496655"

LASSO with CV

Train

if (algo.LASSO.caret == TRUE){
  set.seed(1)
  tune.grid= expand.grid(alpha = 1,lambda = 10^seq(from=-4,to=-2,length=100))
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "glmnet"
                                   ,subopt = 'LASSO'
                                   ,tune.grid = tune.grid
                                   ,feature.names = feature.names)
  model.LASSO.caret = returned$model
}
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 1, lambda = 0.000586 on full training set
## glmnet 
## 
## 5584 samples
##  240 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   lambda        RMSE        Rsquared   MAE       
##   0.0001000000  0.03186254  0.2131706  0.02448446
##   0.0001047616  0.03185287  0.2135421  0.02447754
##   0.0001097499  0.03184290  0.2139273  0.02447050
##   0.0001149757  0.03183278  0.2143197  0.02446345
##   0.0001204504  0.03182251  0.2147196  0.02445629
##   0.0001261857  0.03181203  0.2151295  0.02444900
##   0.0001321941  0.03180124  0.2155546  0.02444145
##   0.0001384886  0.03179022  0.2159929  0.02443365
##   0.0001450829  0.03177900  0.2164424  0.02442555
##   0.0001519911  0.03176751  0.2169073  0.02441716
##   0.0001592283  0.03175573  0.2173883  0.02440850
##   0.0001668101  0.03174379  0.2178795  0.02439969
##   0.0001747528  0.03173156  0.2183867  0.02439073
##   0.0001830738  0.03171937  0.2188955  0.02438193
##   0.0001917910  0.03170710  0.2194114  0.02437324
##   0.0002009233  0.03169455  0.2199451  0.02436448
##   0.0002104904  0.03168172  0.2204961  0.02435570
##   0.0002205131  0.03166879  0.2210587  0.02434699
##   0.0002310130  0.03165573  0.2216326  0.02433833
##   0.0002420128  0.03164269  0.2222132  0.02432989
##   0.0002535364  0.03162937  0.2228143  0.02432140
##   0.0002656088  0.03161609  0.2234218  0.02431286
##   0.0002782559  0.03160254  0.2240509  0.02430405
##   0.0002915053  0.03158888  0.2246960  0.02429503
##   0.0003053856  0.03157553  0.2253375  0.02428615
##   0.0003199267  0.03156213  0.2259943  0.02427741
##   0.0003351603  0.03154912  0.2266462  0.02426905
##   0.0003511192  0.03153688  0.2272725  0.02426095
##   0.0003678380  0.03152472  0.2279069  0.02425278
##   0.0003853529  0.03151364  0.2285015  0.02424530
##   0.0004037017  0.03150354  0.2290623  0.02423817
##   0.0004229243  0.03149441  0.2295903  0.02423105
##   0.0004430621  0.03148657  0.2300705  0.02422479
##   0.0004641589  0.03148012  0.2304946  0.02421964
##   0.0004862602  0.03147502  0.2308656  0.02421592
##   0.0005094138  0.03147118  0.2311882  0.02421398
##   0.0005336699  0.03146793  0.2314971  0.02421329
##   0.0005590810  0.03146563  0.2317725  0.02421364
##   0.0005857021  0.03146485  0.2319857  0.02421510
##   0.0006135907  0.03146584  0.2321220  0.02421818
##   0.0006428073  0.03146918  0.2321489  0.02422348
##   0.0006734151  0.03147475  0.2320748  0.02423001
##   0.0007054802  0.03148285  0.2318850  0.02423797
##   0.0007390722  0.03149216  0.2316487  0.02424663
##   0.0007742637  0.03150352  0.2313229  0.02425721
##   0.0008111308  0.03151561  0.2309812  0.02426887
##   0.0008497534  0.03152883  0.2306043  0.02428169
##   0.0008902151  0.03154318  0.2301866  0.02429514
##   0.0009326033  0.03155801  0.2297646  0.02430946
##   0.0009770100  0.03157373  0.2293186  0.02432425
##   0.0010235310  0.03159029  0.2288493  0.02433989
##   0.0010722672  0.03160905  0.2282867  0.02435774
##   0.0011233240  0.03162880  0.2276984  0.02437730
##   0.0011768120  0.03164967  0.2270796  0.02439749
##   0.0012328467  0.03167075  0.2264811  0.02441810
##   0.0012915497  0.03169277  0.2258702  0.02443960
##   0.0013530478  0.03171508  0.2252849  0.02446178
##   0.0014174742  0.03173903  0.2246526  0.02448601
##   0.0014849683  0.03176478  0.2239701  0.02451191
##   0.0015556761  0.03179265  0.2232226  0.02454013
##   0.0016297508  0.03182310  0.2223846  0.02457066
##   0.0017073526  0.03185634  0.2214465  0.02460369
##   0.0017886495  0.03189252  0.2203999  0.02463923
##   0.0018738174  0.03193174  0.2192377  0.02467710
##   0.0019630407  0.03197388  0.2179690  0.02471746
##   0.0020565123  0.03201695  0.2167355  0.02475935
##   0.0021544347  0.03205945  0.2156429  0.02480038
##   0.0022570197  0.03210426  0.2145055  0.02484348
##   0.0023644894  0.03215061  0.2133777  0.02488785
##   0.0024770764  0.03219908  0.2122371  0.02493429
##   0.0025950242  0.03224814  0.2111939  0.02498085
##   0.0027185882  0.03230077  0.2100683  0.02502947
##   0.0028480359  0.03235759  0.2088192  0.02508109
##   0.0029836472  0.03241976  0.2073653  0.02513682
##   0.0031257158  0.03248784  0.2056592  0.02519677
##   0.0032745492  0.03256239  0.2036489  0.02526150
##   0.0034304693  0.03264401  0.2012711  0.02533115
##   0.0035938137  0.03273121  0.1986201  0.02540430
##   0.0037649358  0.03282229  0.1958230  0.02548042
##   0.0039442061  0.03291521  0.1930845  0.02555791
##   0.0041320124  0.03300820  0.1905777  0.02563477
##   0.0043287613  0.03310881  0.1876581  0.02571685
##   0.0045348785  0.03321832  0.1841423  0.02580479
##   0.0047508102  0.03333808  0.1798125  0.02589992
##   0.0049770236  0.03346901  0.1744487  0.02600325
##   0.0052140083  0.03361185  0.1677970  0.02611460
##   0.0054622772  0.03376758  0.1595180  0.02623513
##   0.0057223677  0.03392750  0.1503030  0.02635743
##   0.0059948425  0.03408432  0.1407341  0.02647539
##   0.0062802914  0.03422971  0.1321208  0.02658008
##   0.0065793322  0.03435491  0.1254670  0.02666645
##   0.0068926121  0.03447684  0.1190162  0.02675022
##   0.0072208090  0.03459142  0.1131615  0.02682868
##   0.0075646333  0.03469121  0.1096721  0.02689845
##   0.0079248290  0.03477185  0.1096415  0.02695410
##   0.0083021757  0.03486007  0.1096104  0.02701474
##   0.0086974900  0.03495639  0.1096104  0.02708170
##   0.0091116276  0.03506180  0.1096104  0.02715569
##   0.0095454846  0.03517711  0.1096104  0.02723706
##   0.0100000000  0.03530321  0.1096104  0.02732657
## 
## Tuning parameter 'alpha' was held constant at a value of 1
## RMSE was used to select the optimal model using the smallest value.
## The final values used for the model were alpha = 1 and lambda = 0.0005857021.

##    alpha       lambda
## 39     1 0.0005857021
##     alpha       lambda       RMSE  Rsquared        MAE       RMSESD RsquaredSD        MAESD
## 1       1 0.0001000000 0.03186254 0.2131706 0.02448446 0.0010816415 0.03059149 0.0006225145
## 2       1 0.0001047616 0.03185287 0.2135421 0.02447754 0.0010806578 0.03061109 0.0006218163
## 3       1 0.0001097499 0.03184290 0.2139273 0.02447050 0.0010796780 0.03063311 0.0006210306
## 4       1 0.0001149757 0.03183278 0.2143197 0.02446345 0.0010786347 0.03065167 0.0006199662
## 5       1 0.0001204504 0.03182251 0.2147196 0.02445629 0.0010776461 0.03067391 0.0006186734
## 6       1 0.0001261857 0.03181203 0.2151295 0.02444900 0.0010765133 0.03068742 0.0006171169
## 7       1 0.0001321941 0.03180124 0.2155546 0.02444145 0.0010753657 0.03069829 0.0006153765
## 8       1 0.0001384886 0.03179022 0.2159929 0.02443365 0.0010742877 0.03071623 0.0006136614
## 9       1 0.0001450829 0.03177900 0.2164424 0.02442555 0.0010733520 0.03074157 0.0006120619
## 10      1 0.0001519911 0.03176751 0.2169073 0.02441716 0.0010724554 0.03077286 0.0006105353
## 11      1 0.0001592283 0.03175573 0.2173883 0.02440850 0.0010715325 0.03080546 0.0006088284
## 12      1 0.0001668101 0.03174379 0.2178795 0.02439969 0.0010706120 0.03083976 0.0006070682
## 13      1 0.0001747528 0.03173156 0.2183867 0.02439073 0.0010698246 0.03087357 0.0006055616
## 14      1 0.0001830738 0.03171937 0.2188955 0.02438193 0.0010688719 0.03089765 0.0006041654
## 15      1 0.0001917910 0.03170710 0.2194114 0.02437324 0.0010678191 0.03091850 0.0006029665
## 16      1 0.0002009233 0.03169455 0.2199451 0.02436448 0.0010668853 0.03095159 0.0006021066
## 17      1 0.0002104904 0.03168172 0.2204961 0.02435570 0.0010658951 0.03098268 0.0006015381
## 18      1 0.0002205131 0.03166879 0.2210587 0.02434699 0.0010648152 0.03101650 0.0006008219
## 19      1 0.0002310130 0.03165573 0.2216326 0.02433833 0.0010636070 0.03104953 0.0006002490
## 20      1 0.0002420128 0.03164269 0.2222132 0.02432989 0.0010617190 0.03106049 0.0005990607
## 21      1 0.0002535364 0.03162937 0.2228143 0.02432140 0.0010592583 0.03105938 0.0005977307
## 22      1 0.0002656088 0.03161609 0.2234218 0.02431286 0.0010562568 0.03103555 0.0005956924
## 23      1 0.0002782559 0.03160254 0.2240509 0.02430405 0.0010526590 0.03099020 0.0005927604
## 24      1 0.0002915053 0.03158888 0.2246960 0.02429503 0.0010489110 0.03095464 0.0005896721
## 25      1 0.0003053856 0.03157553 0.2253375 0.02428615 0.0010451476 0.03093785 0.0005862107
## 26      1 0.0003199267 0.03156213 0.2259943 0.02427741 0.0010413014 0.03094294 0.0005820946
## 27      1 0.0003351603 0.03154912 0.2266462 0.02426905 0.0010376769 0.03098014 0.0005777432
## 28      1 0.0003511192 0.03153688 0.2272725 0.02426095 0.0010334578 0.03098666 0.0005728602
## 29      1 0.0003678380 0.03152472 0.2279069 0.02425278 0.0010289148 0.03097041 0.0005677343
## 30      1 0.0003853529 0.03151364 0.2285015 0.02424530 0.0010236841 0.03092529 0.0005627201
## 31      1 0.0004037017 0.03150354 0.2290623 0.02423817 0.0010177993 0.03086858 0.0005578103
## 32      1 0.0004229243 0.03149441 0.2295903 0.02423105 0.0010120648 0.03082125 0.0005527572
## 33      1 0.0004430621 0.03148657 0.2300705 0.02422479 0.0010065515 0.03078805 0.0005477771
## 34      1 0.0004641589 0.03148012 0.2304946 0.02421964 0.0010010731 0.03074240 0.0005431649
## 35      1 0.0004862602 0.03147502 0.2308656 0.02421592 0.0009959623 0.03070814 0.0005390395
## 36      1 0.0005094138 0.03147118 0.2311882 0.02421398 0.0009907084 0.03064639 0.0005349833
## 37      1 0.0005336699 0.03146793 0.2314971 0.02421329 0.0009852179 0.03054176 0.0005307681
## 38      1 0.0005590810 0.03146563 0.2317725 0.02421364 0.0009798335 0.03044806 0.0005266810
## 39      1 0.0005857021 0.03146485 0.2319857 0.02421510 0.0009744800 0.03038492 0.0005225261
## 40      1 0.0006135907 0.03146584 0.2321220 0.02421818 0.0009691706 0.03032166 0.0005183409
## 41      1 0.0006428073 0.03146918 0.2321489 0.02422348 0.0009632859 0.03026012 0.0005132814
## 42      1 0.0006734151 0.03147475 0.2320748 0.02423001 0.0009574506 0.03018704 0.0005083421
## 43      1 0.0007054802 0.03148285 0.2318850 0.02423797 0.0009522075 0.03013845 0.0005032098
## 44      1 0.0007390722 0.03149216 0.2316487 0.02424663 0.0009476385 0.03013464 0.0004984777
## 45      1 0.0007742637 0.03150352 0.2313229 0.02425721 0.0009443090 0.03019861 0.0004934302
## 46      1 0.0008111308 0.03151561 0.2309812 0.02426887 0.0009410966 0.03032250 0.0004889598
## 47      1 0.0008497534 0.03152883 0.2306043 0.02428169 0.0009375036 0.03047281 0.0004844285
## 48      1 0.0008902151 0.03154318 0.2301866 0.02429514 0.0009340251 0.03063555 0.0004808780
## 49      1 0.0009326033 0.03155801 0.2297646 0.02430946 0.0009300631 0.03076275 0.0004771733
## 50      1 0.0009770100 0.03157373 0.2293186 0.02432425 0.0009261764 0.03091865 0.0004739790
## 51      1 0.0010235310 0.03159029 0.2288493 0.02433989 0.0009219009 0.03106193 0.0004710222
## 52      1 0.0010722672 0.03160905 0.2282867 0.02435774 0.0009176930 0.03119991 0.0004682422
## 53      1 0.0011233240 0.03162880 0.2276984 0.02437730 0.0009139692 0.03128803 0.0004668880
## 54      1 0.0011768120 0.03164967 0.2270796 0.02439749 0.0009106334 0.03141549 0.0004658644
## 55      1 0.0012328467 0.03167075 0.2264811 0.02441810 0.0009069274 0.03144445 0.0004652042
## 56      1 0.0012915497 0.03169277 0.2258702 0.02443960 0.0009032899 0.03151830 0.0004648124
## 57      1 0.0013530478 0.03171508 0.2252849 0.02446178 0.0008991696 0.03153616 0.0004641038
## 58      1 0.0014174742 0.03173903 0.2246526 0.02448601 0.0008958252 0.03158916 0.0004636912
## 59      1 0.0014849683 0.03176478 0.2239701 0.02451191 0.0008931801 0.03166195 0.0004638868
## 60      1 0.0015556761 0.03179265 0.2232226 0.02454013 0.0008910066 0.03177314 0.0004646398
## 61      1 0.0016297508 0.03182310 0.2223846 0.02457066 0.0008892402 0.03190584 0.0004657213
## 62      1 0.0017073526 0.03185634 0.2214465 0.02460369 0.0008873051 0.03205254 0.0004670256
## 63      1 0.0017886495 0.03189252 0.2203999 0.02463923 0.0008852125 0.03221355 0.0004684890
## 64      1 0.0018738174 0.03193174 0.2192377 0.02467710 0.0008828851 0.03237307 0.0004700008
## 65      1 0.0019630407 0.03197388 0.2179690 0.02471746 0.0008803235 0.03251003 0.0004720877
## 66      1 0.0020565123 0.03201695 0.2167355 0.02475935 0.0008782437 0.03276090 0.0004736465
## 67      1 0.0021544347 0.03205945 0.2156429 0.02480038 0.0008755669 0.03286425 0.0004741269
## 68      1 0.0022570197 0.03210426 0.2145055 0.02484348 0.0008731160 0.03303330 0.0004740046
## 69      1 0.0023644894 0.03215061 0.2133777 0.02488785 0.0008697195 0.03312975 0.0004728314
## 70      1 0.0024770764 0.03219908 0.2122371 0.02493429 0.0008666542 0.03329081 0.0004716820
## 71      1 0.0025950242 0.03224814 0.2111939 0.02498085 0.0008627554 0.03330024 0.0004694517
## 72      1 0.0027185882 0.03230077 0.2100683 0.02502947 0.0008590609 0.03337342 0.0004680067
## 73      1 0.0028480359 0.03235759 0.2088192 0.02508109 0.0008551360 0.03346376 0.0004670723
## 74      1 0.0029836472 0.03241976 0.2073653 0.02513682 0.0008510986 0.03356231 0.0004665059
## 75      1 0.0031257158 0.03248784 0.2056592 0.02519677 0.0008469106 0.03365733 0.0004664300
## 76      1 0.0032745492 0.03256239 0.2036489 0.02526150 0.0008425703 0.03374455 0.0004658111
## 77      1 0.0034304693 0.03264401 0.2012711 0.02533115 0.0008380791 0.03381881 0.0004646748
## 78      1 0.0035938137 0.03273121 0.1986201 0.02540430 0.0008331604 0.03388408 0.0004636922
## 79      1 0.0037649358 0.03282229 0.1958230 0.02548042 0.0008284423 0.03374588 0.0004622902
## 80      1 0.0039442061 0.03291521 0.1930845 0.02555791 0.0008251746 0.03388233 0.0004636622
## 81      1 0.0041320124 0.03300820 0.1905777 0.02563477 0.0008214935 0.03375383 0.0004640136
## 82      1 0.0043287613 0.03310881 0.1876581 0.02571685 0.0008184043 0.03375498 0.0004644003
## 83      1 0.0045348785 0.03321832 0.1841423 0.02580479 0.0008151915 0.03372051 0.0004639725
## 84      1 0.0047508102 0.03333808 0.1798125 0.02589992 0.0008120953 0.03362327 0.0004642206
## 85      1 0.0049770236 0.03346901 0.1744487 0.02600325 0.0008091510 0.03343275 0.0004620281
## 86      1 0.0052140083 0.03361185 0.1677970 0.02611460 0.0008057895 0.03308994 0.0004578725
## 87      1 0.0054622772 0.03376758 0.1595180 0.02623513 0.0008018740 0.03251782 0.0004509641
## 88      1 0.0057223677 0.03392750 0.1503030 0.02635743 0.0007921065 0.03163494 0.0004389589
## 89      1 0.0059948425 0.03408432 0.1407341 0.02647539 0.0007728687 0.02915934 0.0004206739
## 90      1 0.0062802914 0.03422971 0.1321208 0.02658008 0.0007598752 0.02873562 0.0004091920
## 91      1 0.0065793322 0.03435491 0.1254670 0.02666645 0.0007483329 0.02662652 0.0004003342
## 92      1 0.0068926121 0.03447684 0.1190162 0.02675022 0.0007397248 0.02574520 0.0003921189
## 93      1 0.0072208090 0.03459142 0.1131615 0.02682868 0.0007368861 0.02372812 0.0003850593
## 94      1 0.0075646333 0.03469121 0.1096721 0.02689845 0.0007358720 0.02471483 0.0003812929
## 95      1 0.0079248290 0.03477185 0.1096415 0.02695410 0.0007343381 0.02475087 0.0003794832
## 96      1 0.0083021757 0.03486007 0.1096104 0.02701474 0.0007329875 0.02478797 0.0003777373
## 97      1 0.0086974900 0.03495639 0.1096104 0.02708170 0.0007316796 0.02478797 0.0003770035
## 98      1 0.0091116276 0.03506180 0.1096104 0.02715569 0.0007307107 0.02478797 0.0003774184
## 99      1 0.0095454846 0.03517711 0.1096104 0.02723706 0.0007301431 0.02478797 0.0003791633
## 100     1 0.0100000000 0.03530321 0.1096104 0.02732657 0.0007300461 0.02478797 0.0003835752

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##                model.coef
## (Intercept)  1.993238e+00
## x4          -3.322227e-05
## x7           9.914108e-03
## x8           2.584645e-04
## x9           2.633861e-03
## x10          7.245818e-04
## x11          1.061230e+05
## x14         -1.134113e-05
## x16          7.740589e-04
## x17          1.423644e-03
## x21          4.028261e-05
## stat3        1.094643e-04
## stat4       -2.159159e-04
## stat10      -4.625205e-07
## stat13      -3.488169e-04
## stat14      -3.898007e-04
## stat18      -3.075583e-06
## stat22      -1.343960e-04
## stat23       2.165912e-04
## stat24      -1.745584e-04
## stat25      -2.026026e-04
## stat26      -5.461865e-05
## stat33      -6.567348e-05
## stat35      -2.033416e-05
## stat38       4.163937e-05
## stat41      -3.859030e-04
## stat45      -3.866762e-05
## stat48       8.810615e-06
## stat54      -1.757184e-07
## stat59       7.866764e-05
## stat60       8.085970e-06
## stat65      -1.834546e-04
## stat70       4.300945e-05
## stat73       3.196639e-05
## stat86       3.758349e-05
## stat89      -7.070199e-06
## stat91      -1.266341e-04
## stat96      -4.010251e-07
## stat98       3.235892e-03
## stat100      1.147835e-04
## stat110     -3.080214e-03
## stat130      1.815544e-04
## stat144      2.632748e-04
## stat146     -2.296307e-04
## stat147     -1.889099e-06
## stat149     -1.792711e-04
## stat156      7.789184e-05
## stat166     -5.466565e-05
## stat175     -1.862416e-04
## stat180     -1.663797e-04
## stat198     -6.511567e-08
## stat204     -1.922317e-04
## stat207      5.226865e-05
## stat213     -2.276448e-05
## stat214     -2.622186e-05
## x18.sqrt     2.456374e-02

Test

if (algo.LASSO.caret == TRUE){
  test.model(model.LASSO.caret, data.test
             ,method = 'glmnet',subopt = "LASSO"
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.042   2.085   2.097   2.097   2.108   2.139 
## [1] "glmnet LASSO Test MSE: 0.00104739061360084"

LARS with CV

Train

if (algo.LARS.caret == TRUE){
  set.seed(1)
  returned = train.caret.glmselect(formula = formula
                                   ,data =  data.train
                                   ,method = "lars"
                                   ,subopt = 'NULL'
                                   ,feature.names = feature.names)
  model.LARS.caret = returned$model
}
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, : There were missing values in resampled
## performance measures.
## Aggregating results
## Selecting tuning parameters
## Fitting fraction = 0.384 on full training set
## Least Angle Regression 
## 
## 5584 samples
##  240 predictor
## 
## Pre-processing: centered (240), scaled (240) 
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 5026, 5026, 5026, 5025, 5025, 5026, ... 
## Resampling results across tuning parameters:
## 
##   fraction    RMSE        Rsquared   MAE       
##   0.00000000  0.03585074        NaN  0.02771086
##   0.01010101  0.03544188  0.1096104  0.02742321
##   0.02020202  0.03507687  0.1096104  0.02716467
##   0.03030303  0.03475710  0.1096104  0.02694238
##   0.04040404  0.03448412  0.1182772  0.02675612
##   0.05050505  0.03423647  0.1310634  0.02658569
##   0.06060606  0.03401173  0.1448227  0.02642057
##   0.07070707  0.03379604  0.1577741  0.02625639
##   0.08080808  0.03358850  0.1688316  0.02609598
##   0.09090909  0.03339255  0.1775365  0.02594327
##   0.10101010  0.03320840  0.1843373  0.02579742
##   0.11111111  0.03303625  0.1896267  0.02565866
##   0.12121212  0.03287754  0.1938795  0.02552706
##   0.13131313  0.03273113  0.1984520  0.02540406
##   0.14141414  0.03259208  0.2027354  0.02528693
##   0.15151515  0.03246282  0.2062307  0.02517520
##   0.16161616  0.03234346  0.2090687  0.02506927
##   0.17171717  0.03223599  0.2114361  0.02496995
##   0.18181818  0.03213989  0.2135030  0.02487723
##   0.19191919  0.03205252  0.2157609  0.02479417
##   0.20202020  0.03197263  0.2179263  0.02471703
##   0.21212121  0.03189582  0.2202754  0.02464334
##   0.22222222  0.03182709  0.2222345  0.02457592
##   0.23232323  0.03176676  0.2238983  0.02451602
##   0.24242424  0.03171555  0.2252527  0.02446447
##   0.25252525  0.03167271  0.2263736  0.02442224
##   0.26262626  0.03163388  0.2275004  0.02438282
##   0.27272727  0.03159940  0.2285528  0.02434910
##   0.28282828  0.03157258  0.2293418  0.02432399
##   0.29292929  0.03155020  0.2299920  0.02430222
##   0.30303030  0.03153097  0.2305637  0.02428376
##   0.31313131  0.03151493  0.2310113  0.02426823
##   0.32323232  0.03150084  0.2314032  0.02425458
##   0.33333333  0.03148914  0.2317196  0.02424369
##   0.34343434  0.03147959  0.2319635  0.02423469
##   0.35353535  0.03147204  0.2321366  0.02422760
##   0.36363636  0.03146727  0.2321815  0.02422147
##   0.37373737  0.03146451  0.2321394  0.02421704
##   0.38383838  0.03146357  0.2320228  0.02421425
##   0.39393939  0.03146418  0.2318430  0.02421297
##   0.40404040  0.03146604  0.2316160  0.02421268
##   0.41414141  0.03146853  0.2313690  0.02421315
##   0.42424242  0.03147116  0.2311285  0.02421388
##   0.43434343  0.03147425  0.2308761  0.02421511
##   0.44444444  0.03147797  0.2306000  0.02421768
##   0.45454545  0.03148260  0.2302889  0.02422125
##   0.46464646  0.03148811  0.2299426  0.02422567
##   0.47474747  0.03149436  0.2295655  0.02423091
##   0.48484848  0.03150098  0.2291816  0.02423591
##   0.49494949  0.03150817  0.2287783  0.02424101
##   0.50505051  0.03151597  0.2283530  0.02424633
##   0.51515152  0.03152425  0.2279114  0.02425211
##   0.52525253  0.03153286  0.2274629  0.02425788
##   0.53535354  0.03154196  0.2269958  0.02426372
##   0.54545455  0.03155130  0.2265222  0.02426962
##   0.55555556  0.03156104  0.2260369  0.02427578
##   0.56565657  0.03157118  0.2255389  0.02428238
##   0.57575758  0.03158151  0.2250400  0.02428927
##   0.58585859  0.03159192  0.2245450  0.02429640
##   0.59595960  0.03160257  0.2240443  0.02430357
##   0.60606061  0.03161343  0.2235402  0.02431078
##   0.61616162  0.03162394  0.2230582  0.02431733
##   0.62626263  0.03163465  0.2225724  0.02432406
##   0.63636364  0.03164545  0.2220872  0.02433086
##   0.64646465  0.03165637  0.2216011  0.02433800
##   0.65656566  0.03166729  0.2211212  0.02434531
##   0.66666667  0.03167825  0.2206451  0.02435272
##   0.67676768  0.03168941  0.2201645  0.02436042
##   0.68686869  0.03170068  0.2196833  0.02436829
##   0.69696970  0.03171186  0.2192113  0.02437609
##   0.70707071  0.03172306  0.2187423  0.02438406
##   0.71717172  0.03173460  0.2182616  0.02439237
##   0.72727273  0.03174633  0.2177758  0.02440098
##   0.73737374  0.03175820  0.2172888  0.02440974
##   0.74747475  0.03177009  0.2168052  0.02441848
##   0.75757576  0.03178206  0.2163225  0.02442729
##   0.76767677  0.03179406  0.2158438  0.02443587
##   0.77777778  0.03180631  0.2153591  0.02444456
##   0.78787879  0.03181863  0.2148755  0.02445323
##   0.79797980  0.03183120  0.2143856  0.02446209
##   0.80808081  0.03184402  0.2138881  0.02447101
##   0.81818182  0.03185697  0.2133890  0.02448027
##   0.82828283  0.03187007  0.2128876  0.02448968
##   0.83838384  0.03188334  0.2123831  0.02449927
##   0.84848485  0.03189694  0.2118687  0.02450917
##   0.85858586  0.03191055  0.2113570  0.02451905
##   0.86868687  0.03192424  0.2108451  0.02452906
##   0.87878788  0.03193801  0.2103347  0.02453932
##   0.88888889  0.03195200  0.2098187  0.02454985
##   0.89898990  0.03196604  0.2093054  0.02456024
##   0.90909091  0.03198009  0.2087963  0.02457058
##   0.91919192  0.03199442  0.2082788  0.02458120
##   0.92929293  0.03200879  0.2077636  0.02459188
##   0.93939394  0.03202299  0.2072601  0.02460251
##   0.94949495  0.03203722  0.2067602  0.02461313
##   0.95959596  0.03205167  0.2062548  0.02462390
##   0.96969697  0.03206621  0.2057499  0.02463481
##   0.97979798  0.03208080  0.2052478  0.02464587
##   0.98989899  0.03209529  0.2047544  0.02465698
##   1.00000000  0.03210993  0.2042585  0.02466824
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was fraction = 0.3838384.

##     fraction
## 39 0.3838384
## Warning: Removed 1 rows containing missing values (geom_point).

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## [1] "Coefficients"
##            x4            x7            x8            x9           x10           x11           x14           x16 
## -1.575481e-03  6.658898e-03  7.387385e-04  3.407229e-03  9.998359e-04  6.115442e-04 -6.635444e-06  1.545808e-03 
##           x17           x21         stat3         stat4        stat13        stat14        stat22        stat23 
##  1.876302e-03  4.007124e-04  1.827028e-04 -3.644477e-04 -6.000876e-04 -6.753247e-04 -2.250790e-04  3.680173e-04 
##        stat24        stat25        stat26        stat33        stat35        stat38        stat41        stat45 
## -2.944903e-04 -3.433677e-04 -8.779615e-05 -1.063788e-04 -2.832547e-05  6.574977e-05 -6.663140e-04 -5.990244e-05 
##        stat48        stat59        stat65        stat70        stat73        stat86        stat91        stat98 
##  6.456400e-06  1.306049e-04 -3.071217e-04  6.779608e-05  4.959550e-05  5.783115e-05 -2.149021e-04  5.670485e-03 
##       stat100       stat110       stat130       stat144       stat146       stat149       stat156       stat166 
##  1.926071e-04 -5.319444e-03  3.100836e-04  4.511700e-04 -3.897256e-04 -3.032107e-04  1.271650e-04 -9.046295e-05 
##       stat175       stat180       stat204       stat207       stat213       stat214      x18.sqrt 
## -3.156293e-04 -2.830228e-04 -3.288871e-04  8.351388e-05 -3.297079e-05 -3.888460e-05  1.118530e-02

Test

if (algo.LARS.caret == TRUE){
  test.model(model.LARS.caret, data.test
             ,method = 'lars',subopt = NULL
             ,formula = formula, feature.names = feature.names, label.names = label.names
             ,draw.limits = TRUE, transformation = t)
}
## [1] "Summary of predicted values: "
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.042   2.085   2.097   2.097   2.108   2.139 
## [1] "lars  Test MSE: 0.00104745935190979"

Session Info

sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows Server 2008 R2 x64 (build 7601) Service Pack 1
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.1252  LC_CTYPE=English_United States.1252    LC_MONETARY=English_United States.1252
## [4] LC_NUMERIC=C                           LC_TIME=English_United States.1252    
## 
## attached base packages:
## [1] parallel  stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] knitr_1.20                 htmltools_0.3.6            reshape2_1.4.3             lars_1.2                  
##  [5] doParallel_1.0.11          iterators_1.0.9            caret_6.0-81               leaps_3.0                 
##  [9] ggforce_0.2.1              rlist_0.4.6.1              car_3.0-2                  carData_3.0-2             
## [13] bestNormalize_1.3.0        scales_1.0.0               onewaytests_2.0            caTools_1.17.1.1          
## [17] mosaic_1.5.0               mosaicData_0.17.0          ggformula_0.9.1            ggstance_0.3.1            
## [21] lattice_0.20-38            DT_0.4                     ggiraphExtra_0.2.9         ggiraph_0.6.0             
## [25] investr_1.4.0              glmnet_2.0-16              foreach_1.4.4              Matrix_1.2-15             
## [29] MASS_7.3-51.1              PerformanceAnalytics_1.5.2 xts_0.10-2                 zoo_1.8-2                 
## [33] forcats_0.3.0              stringr_1.3.1              dplyr_0.8.0.1              purrr_0.2.5               
## [37] readr_1.1.1                tidyr_0.8.1                tibble_2.1.1               ggplot2_3.1.0             
## [41] tidyverse_1.2.1            usdm_1.1-18                raster_2.8-19              sp_1.3-1                  
## [45] pacman_0.5.1              
## 
## loaded via a namespace (and not attached):
##  [1] readxl_1.1.0       backports_1.1.2    plyr_1.8.4         lazyeval_0.2.1     splines_3.5.2      mycor_0.1.1       
##  [7] crosstalk_1.0.0    leaflet_2.0.2      digest_0.6.15      magrittr_1.5       mosaicCore_0.6.0   openxlsx_4.1.0    
## [13] recipes_0.1.4      modelr_0.1.2       gower_0.1.2        colorspace_1.3-2   rvest_0.3.2        ggrepel_0.8.0     
## [19] haven_1.1.2        crayon_1.3.4       jsonlite_1.5       survival_2.43-3    glue_1.3.1         polyclip_1.10-0   
## [25] registry_0.5-1     gtable_0.2.0       ppcor_1.1          ipred_0.9-6        sjmisc_2.7.9       abind_1.4-5       
## [31] rngtools_1.3.1     bibtex_0.4.2       Rcpp_1.0.1         xtable_1.8-2       foreign_0.8-71     stats4_3.5.2      
## [37] lava_1.6.2         prodlim_2018.04.18 htmlwidgets_1.2    httr_1.3.1         RColorBrewer_1.1-2 pkgconfig_2.0.2   
## [43] farver_1.1.0       nnet_7.3-12        labeling_0.3       tidyselect_0.2.5   rlang_0.3.1        later_0.7.3       
## [49] munsell_0.5.0      cellranger_1.1.0   tools_3.5.2        cli_1.0.1          generics_0.0.2     moments_0.14      
## [55] sjlabelled_1.0.17  broom_0.4.5        evaluate_0.10.1    ggdendro_0.1-20    yaml_2.1.19        ModelMetrics_1.1.0
## [61] zip_2.0.1          nlme_3.1-137       doRNG_1.7.1        mime_0.5           xml2_1.2.0         compiler_3.5.2    
## [67] rstudioapi_0.7     curl_3.2           tweenr_1.0.1       stringi_1.2.3      highr_0.7          gdtools_0.1.7     
## [73] psych_1.8.4        pillar_1.3.1       data.table_1.11.4  bitops_1.0-6       insight_0.1.2      httpuv_1.4.4.2    
## [79] R6_2.2.2           promises_1.0.1     gridExtra_2.3      rio_0.5.16         codetools_0.2-15   assertthat_0.2.0  
## [85] pkgmaker_0.27      rprojroot_1.3-2    withr_2.1.2        nortest_1.0-4      mnormt_1.5-5       mgcv_1.8-26       
## [91] hms_0.4.2          quadprog_1.5-5     grid_3.5.2         rpart_4.1-13       timeDate_3043.102  class_7.3-14      
## [97] rmarkdown_1.10     shiny_1.1.0        lubridate_1.7.4